当前位置:网站首页>yolov5 xml数据集转换为VOC数据集
yolov5 xml数据集转换为VOC数据集
2022-07-04 07:52:00 【雨浅听风吟】
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from pathlib import Path
from xml.dom.minidom import parse
from shutil import copyfile
import os
classes = ['helmet','head','person']
def convert_annot(size , box):
x1 = int(box[0])
y1 = int(box[1])
x2 = int(box[2])
y2 = int(box[3])
dw = np.float32(1. / int(size[0]))
dh = np.float32(1. / int(size[1]))
w = x2 - x1
h = y2 - y1
x = x1 + (w / 2)
y = y1 + (h / 2)
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return [x, y, w, h]
def save_txt_file(img_jpg_file_name, size, img_box):
save_file_name = "./Safety_Helmet_Detection_datasets_xml/labels/" + img_jpg_file_name + ".txt"
#file_path = open(save_file_name, "a+")
with open(save_file_name,"a+") as file_path:
for box in img_box:
cls_num = classes.index(box[0])
new_box = convert_annot(size, box[1:])
cls_num} {
new_box[0]} {
new_box[1]} {
new_box[2]} {
def get_xml_data(file_path, img_xml_file):
img_path = file_path + '/' + img_xml_file + '.xml'
dom = parse(img_path)
root = dom.documentElement
img_name = root.getElementsByTagName("filename")[0].childNodes[0].data
img_size = root.getElementsByTagName("size")[0]
objects = root.getElementsByTagName("object")
img_w = img_size.getElementsByTagName("width")[0].childNodes[0].data
img_h = img_size.getElementsByTagName("height")[0].childNodes[0].data
img_c = img_size.getElementsByTagName("depth")[0].childNodes[0].data
img_box = []
for box in objects:
cls_name = box.getElementsByTagName("name")[0].childNodes[0].data
x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)
y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)
img_jpg_file_name = img_xml_file + '.jpg'
img_box.append([cls_name, x1, y1, x2, y2])
# test_dataset_box_feature(img_jpg_file_name, img_box)
save_txt_file(img_xml_file, [img_w, img_h], img_box)
files = os.listdir('./Safety_Helmet_Detection_datasets_xml/annotations')
for file in files:
print("file name: ", file)
file_xml = file.split(".")
get_xml_data('./Safety_Helmet_Detection_datasets_xml/annotations', file_xml[0])
0 0.9146634956123307 0.3497596284141764 0.11298077343963087 0.14182692836038768
0 0.05168269423302263 0.39663463016040623 0.08413461851887405 0.09134615724906325
0 0.63461540825665 0.3798077064566314 0.05288461735472083 0.09134615724906325
0 0.748798104817979 0.39182693767361343 0.05528846359811723 0.08653846476227045
0 0.305288472911343 0.39783655328210443 0.05288461735472083 0.06971154105849564
0 0.21634616190567613 0.39783655328210443 0.04807692486792803 0.06971154105849564
1 0.1742788526462391 0.3798077064566314 0.05048077111132443 0.06730769481509924
1 0.8016827221726999 0.383413475821726 0.05528846359811723 0.08894231100566685
1 0.44350963190663606 0.41105770762078464 0.04567307862453163 0.07211538730189204
1 0.5552884822245687 0.40024039952550083 0.043269232381135225 0.07451923354528844
1 0.5000000186264515 0.383413475821726 0.03846153989434242 0.06490384857170284
0 0.25240385555662215 0.3605769365094602 0.03365384740754962 0.04807692486792803
1 0.39903847640380263 0.39302886079531163 0.043269232381135225 0.06490384857170284
2 split train val( test)
from sklearn.model_selection import train_test_split
import os
image_list = os.listdir('./Safety_Helmet_Detection_datasets_xml/images/')
''' 分割train test val '''
#train_list, test_list = train_test_split(image_list, test_size=0.2, random_state=42)
#val_list, test_list = train_test_split(test_list, test_size=0.5, random_state=42)
# print('total =',len(image_list))
# print('train :',len(train_list))
# print('val :',len(val_list))
# print('test :',len(test_list))
''' 分割train val '''
train_list, val_list = train_test_split(image_list, test_size=0.2, random_state=42)
print('total =',len(image_list))
print('train :',len(train_list))
print('val :',len(val_list))
total = 5000
train : 4000
val : 1000
from sklearn.model_selection import train_test_split
import os
from pathlib import Path
image_list = os.listdir('./Safety_Helmet_Detection_datasets_xml/images/')
from shutil import copyfile
''' 分割train test val '''
#train_list, test_list = train_test_split(image_list, test_size=0.2, random_state=42)
#val_list, test_list = train_test_split(test_list, test_size=0.5, random_state=42)
# print('total =',len(image_list))
# print('train :',len(train_list))
# print('val :',len(val_list))
# print('test :',len(test_list))
''' 分割train val '''
train_list, val_list = train_test_split(image_list, test_size=0.2, random_state=42)
print('total =',len(image_list))
print('train :',len(train_list))
def copy_data(file_list, img_labels_root, imgs_source, mode):
root_file = Path( './Safety_Helmet_Detection_datasets_VOC/images/'+ mode)
if not root_file.exists():
print(f"Path {
root_file} does not exit")
root_file = Path('./Safety_Helmet_Detection_datasets_VOC/labels/' + mode)
if not root_file.exists():
print(f"Path {
root_file} does not exit")
for file in file_list:
img_name = file.replace('.png', '')
img_src_file = imgs_source + '/' + img_name + '.png'
label_src_file = img_labels_root + '/' + img_name + '.txt'
# im = Image.open(rf"{img_sor_file}")
# im.show()
# Copy image
DICT_DIR = './Safety_Helmet_Detection_datasets_VOC/images/' + mode
img_dict_file = DICT_DIR + '/' + img_name + '.png'
copyfile(img_src_file, img_dict_file)
# Copy label
DICT_DIR = './Safety_Helmet_Detection_datasets_VOC/labels/' + mode
img_dict_file = DICT_DIR + '/' + img_name + '.txt'
copyfile(label_src_file, img_dict_file)
copy_data(train_list, './Safety_Helmet_Detection_datasets_xml/labels', './Safety_Helmet_Detection_datasets_xml/images', "train")
copy_data(val_list, './Safety_Helmet_Detection_datasets_xml/labels', './Safety_Helmet_Detection_datasets_xml/images', "val")
python train.py --img 416 --batch 32 --epochs 300 --data data/helmet.yaml --cfg models/yolov5s.yaml --weights yolov5s.pt
- Using the rate package for data mining
- The idea of implementing charts chart view in all swiftui versions (1.0-4.0) was born
- Leetcode 23. 合并K个升序链表
- MYCAT middleware installation and use
- Blog stop statement
- 促进OKR落地的工作总结该如何写?
- It's healthy to drink medicinal wine like this. Are you drinking it right
- Comparison between applet framework and platform compilation
- 【Go基础】1 - Go Go Go
- zabbix 5.0监控客户端
Ecole bio rushes to the scientific innovation board: the annual revenue is 330million. Honghui fund and Temasek are shareholders
1. Getting started with QT
User login function: simple but difficult
Google's official response: we have not given up tensorflow and will develop side by side with Jax in the future
How to use MOS tube to realize the anti reverse connection circuit of power supply
Zephyr learning notes 1, threads
ZABBIX monitoring system custom monitoring content
Zephyr 学习笔记1,threads
Sqli labs download, installation and reset of SQL injection test tool one of the solutions to the database error (# 0{main}throw in d:\software\phpstudy_pro\www\sqli labs-...)
弈柯莱生物冲刺科创板:年营收3.3亿 弘晖基金与淡马锡是股东
How to get bytes containing null terminators from a string- c#
【Go基础】2 - Go基本语句
University stage summary
Unity opens the explorer from the inspector interface, selects and records the file path
Linear algebra 1.1
With excellent strength, wangchain technology, together with IBM and Huawei, has entered the annual contribution list of "super ledger"!
The idea of implementing charts chart view in all swiftui versions (1.0-4.0) was born
PCIe knowledge points -010: where to get PCIe hot plug data
Moher College phpmailer remote command execution vulnerability tracing
ZABBIX 5.0 monitoring client
This article is enough for learning advanced mysql
Easy to understand: understand the time series database incluxdb
Zephyr 学习笔记1,threads
Système de surveillance zabbix contenu de surveillance personnalisé
R language uses cforest function in Party package to build random forest based on conditional inference trees, uses varimp function to check feature importance, and uses table function to calculate co
1. Qt入门