当前位置:网站首页>Method of converting VOC format data set to Yolo format data set
Method of converting VOC format data set to Yolo format data set
2022-06-11 11:08:00 【Saga】
Most open source datasets today are VOC Format , But we often need to use data sets in other formats when we use them , Very sad , Is it necessary to label one by one ? In fact, there is no need to , Just a simple piece of code is required to directly convert VOC Format data set to yolo Format datasets , The following code can automatically store training sets in several folders , Verification set , And corresponding labels , The data set allocation ratio can also be customized and modified . See below for the specific code :
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
import random
from shutil import copyfile
classes = ["Before a whole","After a whole","Chest former","Chest after","Raise hand before","Raise hand after","Global left position","Global right position","Front face","Left face","Right face"] ## Here, write the class corresponding to the tag
# classes=["ball"]
TRAIN_RATIO = 80 # Indicates that the data set is divided into training set and verification set , according to 2:8 Proportional
def clear_hidden_files(path):
dir_list = os.listdir(path)
for i in dir_list:
abspath = os.path.join(os.path.abspath(path), i)
if os.path.isfile(abspath):
if i.startswith("._"):
os.remove(abspath)
else:
clear_hidden_files(abspath)
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_annotation(image_id):
in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' % image_id)
out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' % image_id, 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
in_file.close()
out_file.close()
wd = os.getcwd()
wd = os.getcwd()
data_base_dir = os.path.join(wd, "VOCdevkit/")
if not os.path.isdir(data_base_dir):
os.mkdir(data_base_dir)
work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")
if not os.path.isdir(work_sapce_dir):
os.mkdir(work_sapce_dir)
annotation_dir = os.path.join(work_sapce_dir, "Annotations/")
if not os.path.isdir(annotation_dir):
os.mkdir(annotation_dir)
clear_hidden_files(annotation_dir)
image_dir = os.path.join(work_sapce_dir, "JPEGImages/")
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
clear_hidden_files(image_dir)
yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")
if not os.path.isdir(yolo_labels_dir):
os.mkdir(yolo_labels_dir)
clear_hidden_files(yolo_labels_dir)
yolov5_images_dir = os.path.join(data_base_dir, "images/")
if not os.path.isdir(yolov5_images_dir):
os.mkdir(yolov5_images_dir)
clear_hidden_files(yolov5_images_dir)
yolov5_labels_dir = os.path.join(data_base_dir, "labels/")
if not os.path.isdir(yolov5_labels_dir):
os.mkdir(yolov5_labels_dir)
clear_hidden_files(yolov5_labels_dir)
yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")
if not os.path.isdir(yolov5_images_train_dir):
os.mkdir(yolov5_images_train_dir)
clear_hidden_files(yolov5_images_train_dir)
yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")
if not os.path.isdir(yolov5_images_test_dir):
os.mkdir(yolov5_images_test_dir)
clear_hidden_files(yolov5_images_test_dir)
yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")
if not os.path.isdir(yolov5_labels_train_dir):
os.mkdir(yolov5_labels_train_dir)
clear_hidden_files(yolov5_labels_train_dir)
yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")
if not os.path.isdir(yolov5_labels_test_dir):
os.mkdir(yolov5_labels_test_dir)
clear_hidden_files(yolov5_labels_test_dir)
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')
train_file.close()
test_file.close()
train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')
test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')
list_imgs = os.listdir(image_dir) # list image files
prob = random.randint(1, 100)
print("Probability: %d" % prob)
for i in range(0, len(list_imgs)):
path = os.path.join(image_dir, list_imgs[i])
if os.path.isfile(path):
image_path = image_dir + list_imgs[i]
voc_path = list_imgs[i]
(nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path))
(voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path))
annotation_name = nameWithoutExtention + '.xml'
annotation_path = os.path.join(annotation_dir, annotation_name)
label_name = nameWithoutExtention + '.txt'
label_path = os.path.join(yolo_labels_dir, label_name)
prob = random.randint(1, 100)
print("Probability: %d" % prob)
if (prob < TRAIN_RATIO): # train dataset
if os.path.exists(annotation_path):
train_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_train_dir + voc_path)
copyfile(label_path, yolov5_labels_train_dir + label_name)
else: # test dataset
if os.path.exists(annotation_path):
test_file.write(image_path + '\n')
convert_annotation(nameWithoutExtention) # convert label
copyfile(image_path, yolov5_images_test_dir + voc_path)
copyfile(label_path, yolov5_labels_test_dir + label_name)
train_file.close()
test_file.close()
About relationships between files , Some scholars may be confused , The original VOC How to store format data set files , And the newly generated yolo Where will the format data set be stored , See the following explanation for these doubts .
Copy first VOC Format data set folder to the root directory where the above code is located , See below :
In the file VOCdevkit See the following for the contents contained in :
After running the above code, you get a new yolo Format datasets , See the following table for the file relationship of the new dataset :

Above picture , I have indicated which file holds the training data set , Verify the file location corresponding to the set and which file to store the label .
That's all VOC Format data set to yolo Methods of formatting data sets , I hope I can help you who are losing your hair like me , Support a lot , thank you !
边栏推荐
- 英文论文阅读知识总结
- 数据库系统概论 ---- 第二章 -- 关系数据库(2.1~2.3)(重要知识点)
- 想做钢铁侠?听说很多大佬都是用它入门的
- 2022 Beijing International Nutrition and Health Industry Expo, the 9th China Great Health Industry Exhibition
- 数字藏品app小程序公众号源码
- 杰理之获取 BLE 出现电压检测、ADC 检测不准【篇】
- Development of official account system for digital collection app applet
- Rxjs Observable. Execute logical analysis of pipe passing in multiple operators
- [K-means] K-means learning examples
- 使用Yolov3训练自己制作数据集,快速上手
猜你喜欢

How programmers do sidelines

杰理之获取 BLE 查看代码异常复位等异常情况原因【篇】

AI security and Privacy Forum issue 11 - stable learning: finding common ground between causal reasoning and machine learning

Xiao P weekly Vol.08

Interpretation of cube technology | past and present life of cube Rendering Design

MySQL (IX)

Mn Monet pagoda host system v1.5 release

杰理之获取 BLE 区分复位跟唤醒【篇】

Report on various activity plans of safety month 2022 (28 pages)

杰理之获取 BLE OTA 双备份升级(只能用于 4Mbits 以上的芯片)【篇】
随机推荐
985高校博士因文言文致谢走红!导师评价其不仅SCI写得好...
Migrating minini to littlefs
Where is it safer to open an account for soda ash futures? How much capital is needed to buy soda ash futures?
2022健博会,北京大健康产业展,艾灸健康展,北京健康服务展
杰理之BLEPR0 和 PR1 当普通 IO 口使用【篇】
杰理之获取 BLE OTA 双备份升级(只能用于 4Mbits 以上的芯片)【篇】
Jerry's ble chip power supply range and anti burn chip measures [chapter]
SWUST oj668: the thief ran away
装饰模式--小美的生日蛋糕
使用Yolov5训练自己制作的数据集,快速上手
网上开户是安全的吗?普通人可以开吗?
Using hystrix to implement fault-tolerant processing of microservices
杰理之获取 BLE 区分复位跟唤醒【篇】
Interpreting USB3.0 test items
SAP Spartacus Reference App Structure
使用Yolov5训练好模型调用电脑自带摄像头时出现问题:TypeError: argument of type “int‘ is not iterable的解决方法
使用Yolov3训练自己制作数据集,快速上手
数字藏品app小程序公众号源码
Writing the program into the microcontroller can control the forward and reverse rotation of the motor more conveniently and quickly
沒有財富就不能自由嗎?