当前位置:网站首页>Visual yolov5 format data set (labelme JSON file)
Visual yolov5 format data set (labelme JSON file)
2022-07-03 01:54:00 【athrunsunny】
In my own project , There are often few data sets , But there are some marked data on the Internet , It is more or less different from the marking requirements of your own project , I don't want to re label , Just want to fine tune it , however yolov5 The native format of is not intuitive to modify , At this time, you can yolov5 Format data into labelme Of json Format , In this way, it is convenient to fine tune the annotation of data , At the same time, there is no need to spend a lot of effort to label big data , Reduce labor costs .
# -*- coding: utf-8 -*-
"""
Time: 2021.10.26
Author: Athrunsunny
Version: V 0.1
File: yolotolabelme.py
Describe: Functions in this file is change the dataset format to labelme json file
"""
import base64
import io
import os
import numpy as np
import json
from glob import glob
import cv2
import shutil
import yaml
from tqdm import tqdm
import PIL.Image
ROOT_DIR = os.getcwd()
VERSION = '4.5.7' # according to labelme Version of
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def process_point(points, cls):
info = list()
for point in points:
shape_info = dict()
shape_info['label'] = cls[int(point[0])]
if point is None:
shape_info['points'] = [[], []]
else:
shape_info['points'] = [[point[1], point[2]],
[point[3], point[4]]]
shape_info['group_id'] = None
shape_info['shape_type'] = 'rectangle'
shape_info['flags'] = dict()
info.append(shape_info)
return info
def create_json(img, imagePath, filename, info):
data = dict()
data['version'] = VERSION
data['flags'] = dict()
data['shapes'] = info
data['imagePath'] = imagePath
height, width = img.shape[:2]
data['imageData'] = img_arr_to_b64(img).decode('utf-8')
data['imageHeight'] = height
data['imageWidth'] = width
jsondata = json.dumps(data, indent=4, separators=(',', ': '))
f = open(filename, 'w')
f.write(jsondata)
f.close()
def read_txt(path):
assert os.path.exists(path)
with open(path, mode='r', encoding="utf-8") as f:
content = f.readlines()
content = np.array(content)
res = []
for index, item in enumerate(content):
string = item.split(' ')
res.append(list(map(np.float64, string)))
return np.array(res)
def load_dataset_info(path=ROOT_DIR):
yamlpath = glob(path + "\\*.yaml")[0]
with open(yamlpath, "r", encoding="utf-8") as f:
data = yaml.load(f, Loader=yaml.FullLoader)
return data
def reconvert_list(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] / dw
w = box[2] / dw
y = box[1] / dh
h = box[3] / dh
x1 = ((x + 1) * 2 - w) / 2.
y1 = ((y + 1) * 2 - h) / 2.
x2 = ((x + 1) * 2 + w) / 2.
y2 = ((y + 1) * 2 + h) / 2.
return x1, y1, x2, y2
def reconvert_np(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[:, :1] / dw
w = box[:, 2:3] / dw
y = box[:, 1:2] / dh
h = box[:, 3:4] / dh
box[:, :1] = ((x + 1) * 2 - w) / 2.
box[:, 2:3] = ((x + 1) * 2 + w) / 2.
box[:, 1:2] = ((y + 1) * 2 - h) / 2.
box[:, 3:4] = ((y + 1) * 2 + h) / 2.
return box
def txt2json(proctype, cls, path=ROOT_DIR):
process_image_path = os.path.join(path, proctype, 'images')
process_label_path = os.path.join(path, proctype, 'labels')
externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']
imgfiles = list()
for extern in externs:
imgfiles.extend(glob(process_image_path + "\\*." + extern))
createfile = os.path.join(ROOT_DIR, 'createjson', proctype)
if not os.path.exists(createfile):
os.makedirs(createfile)
for image_path in tqdm(imgfiles):
frame = cv2.imread(image_path)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
height, width = frame.shape[:2]
size = (width, height)
imgfilename = image_path.replace("\\", "/").split("/")[-1]
imgname = '.'.join(imgfilename.split('.')[:-1])
jsonpath = os.path.join(createfile, imgname + '.json')
txtpath = os.path.join(process_label_path, imgname + '.txt')
label_and_point = read_txt(txtpath)
label_and_point[:, 1:] = reconvert_np(size, label_and_point[:, 1:])
info = process_point(label_and_point, cls)
create_json(frame, imgname, jsonpath, info)
shutil.copy(image_path, createfile)
def yolotolabelme(path=ROOT_DIR):
pathtype = list()
if 'train' in os.listdir(path):
pathtype.append('train')
if 'valid' in os.listdir(path):
pathtype.append('valid')
if 'test' in os.listdir(path):
pathtype.append('test')
cls = load_dataset_info()['names']
for file_type in pathtype:
print("Processing image type {} \n".format(file_type))
txt2json(file_type, cls)
if __name__ == "__main__":
yolotolabelme()
Name the above code yolotolabelme.py And stored in the root directory of the data set
Before running the program, put the above code import Install several libraries of , And then run
After running, it will generate createjson Folder
The converted data will be based on train or valid Generated in createjson Under the folder , After that, you can go through labelme open
Because of my test The data set is empty , So it is empty after conversion , Use labelme Open the train You can see the corresponding annotation in the file under the path
边栏推荐
- [data mining] task 4:20newsgroups clustering
- STM32 - vibration sensor control relay on
- His experience in choosing a startup company or a big Internet company may give you some inspiration
- [data mining] task 5: k-means/dbscan clustering: double square
- [camera topic] complete analysis of camera dtsi
- [data mining] task 1: distance calculation
- DML Foundation
- 小程序開發的部分功能
- 疫情當頭,作為Leader如何進行團隊的管理?| 社區征文
- How to refresh the opening amount of Oracle ERP
猜你喜欢
[data mining] task 1: distance calculation
Technology sharing | Frida's powerful ability to realize hook functions
全链路数字化转型下,零售企业如何打开第二增长曲线
【数据挖掘】任务2:医学数据库MIMIC-III数据处理
Introduction to kotlin collaboration
[data mining] task 6: DBSCAN clustering
Smart management of Green Cities: Digital twin underground integrated pipe gallery platform
深度(穿透)选择器 ::v-deep/deep/及 > > >
NCTF 2018 part Title WP (1)
ByteDance data Lake integration practice based on Hudi
随机推荐
[camera topic] how to save OTP data in user-defined nodes
小程序开发的部分功能
word插入公式/endnote
VIM 9.0 is officially released! The execution speed of the new script can be increased by up to 100 times
Custom components, using NPM packages, global data sharing, subcontracting
网络安全-最简单的病毒
网络安全-DNS欺骗与钓鱼网站
His experience in choosing a startup company or a big Internet company may give you some inspiration
Why can't the start method be called repeatedly? But the run method can?
Modify table structure
网络安全-扫描
Network security - man in the middle attack
技术大佬准备就绪,话题C位由你决定
深度(穿透)选择器 ::v-deep/deep/及 > > >
Sweet talk generator, regular greeting email machine... Open source programmers pay too much for this Valentine's day
Problems encountered in small program development of dark horse shopping mall
Introduction to flask tutorial
Return the only different value (de duplication)
[understanding of opportunity -36]: Guiguzi - flying clamp chapter - prevention against killing and bait
网络安全-ACL访问控制列表