当前位置:网站首页>Visual yolov5 format data set (labelme JSON file)
Visual yolov5 format data set (labelme JSON file)
2022-07-03 01:54:00 【athrunsunny】
In my own project , There are often few data sets , But there are some marked data on the Internet , It is more or less different from the marking requirements of your own project , I don't want to re label , Just want to fine tune it , however yolov5 The native format of is not intuitive to modify , At this time, you can yolov5 Format data into labelme Of json Format , In this way, it is convenient to fine tune the annotation of data , At the same time, there is no need to spend a lot of effort to label big data , Reduce labor costs .
# -*- coding: utf-8 -*-
"""
Time: 2021.10.26
Author: Athrunsunny
Version: V 0.1
File: yolotolabelme.py
Describe: Functions in this file is change the dataset format to labelme json file
"""
import base64
import io
import os
import numpy as np
import json
from glob import glob
import cv2
import shutil
import yaml
from tqdm import tqdm
import PIL.Image
ROOT_DIR = os.getcwd()
VERSION = '4.5.7' # according to labelme Version of
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def process_point(points, cls):
info = list()
for point in points:
shape_info = dict()
shape_info['label'] = cls[int(point[0])]
if point is None:
shape_info['points'] = [[], []]
else:
shape_info['points'] = [[point[1], point[2]],
[point[3], point[4]]]
shape_info['group_id'] = None
shape_info['shape_type'] = 'rectangle'
shape_info['flags'] = dict()
info.append(shape_info)
return info
def create_json(img, imagePath, filename, info):
data = dict()
data['version'] = VERSION
data['flags'] = dict()
data['shapes'] = info
data['imagePath'] = imagePath
height, width = img.shape[:2]
data['imageData'] = img_arr_to_b64(img).decode('utf-8')
data['imageHeight'] = height
data['imageWidth'] = width
jsondata = json.dumps(data, indent=4, separators=(',', ': '))
f = open(filename, 'w')
f.write(jsondata)
f.close()
def read_txt(path):
assert os.path.exists(path)
with open(path, mode='r', encoding="utf-8") as f:
content = f.readlines()
content = np.array(content)
res = []
for index, item in enumerate(content):
string = item.split(' ')
res.append(list(map(np.float64, string)))
return np.array(res)
def load_dataset_info(path=ROOT_DIR):
yamlpath = glob(path + "\\*.yaml")[0]
with open(yamlpath, "r", encoding="utf-8") as f:
data = yaml.load(f, Loader=yaml.FullLoader)
return data
def reconvert_list(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] / dw
w = box[2] / dw
y = box[1] / dh
h = box[3] / dh
x1 = ((x + 1) * 2 - w) / 2.
y1 = ((y + 1) * 2 - h) / 2.
x2 = ((x + 1) * 2 + w) / 2.
y2 = ((y + 1) * 2 + h) / 2.
return x1, y1, x2, y2
def reconvert_np(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[:, :1] / dw
w = box[:, 2:3] / dw
y = box[:, 1:2] / dh
h = box[:, 3:4] / dh
box[:, :1] = ((x + 1) * 2 - w) / 2.
box[:, 2:3] = ((x + 1) * 2 + w) / 2.
box[:, 1:2] = ((y + 1) * 2 - h) / 2.
box[:, 3:4] = ((y + 1) * 2 + h) / 2.
return box
def txt2json(proctype, cls, path=ROOT_DIR):
process_image_path = os.path.join(path, proctype, 'images')
process_label_path = os.path.join(path, proctype, 'labels')
externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']
imgfiles = list()
for extern in externs:
imgfiles.extend(glob(process_image_path + "\\*." + extern))
createfile = os.path.join(ROOT_DIR, 'createjson', proctype)
if not os.path.exists(createfile):
os.makedirs(createfile)
for image_path in tqdm(imgfiles):
frame = cv2.imread(image_path)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
height, width = frame.shape[:2]
size = (width, height)
imgfilename = image_path.replace("\\", "/").split("/")[-1]
imgname = '.'.join(imgfilename.split('.')[:-1])
jsonpath = os.path.join(createfile, imgname + '.json')
txtpath = os.path.join(process_label_path, imgname + '.txt')
label_and_point = read_txt(txtpath)
label_and_point[:, 1:] = reconvert_np(size, label_and_point[:, 1:])
info = process_point(label_and_point, cls)
create_json(frame, imgname, jsonpath, info)
shutil.copy(image_path, createfile)
def yolotolabelme(path=ROOT_DIR):
pathtype = list()
if 'train' in os.listdir(path):
pathtype.append('train')
if 'valid' in os.listdir(path):
pathtype.append('valid')
if 'test' in os.listdir(path):
pathtype.append('test')
cls = load_dataset_info()['names']
for file_type in pathtype:
print("Processing image type {} \n".format(file_type))
txt2json(file_type, cls)
if __name__ == "__main__":
yolotolabelme()
Name the above code yolotolabelme.py And stored in the root directory of the data set

Before running the program, put the above code import Install several libraries of , And then run

After running, it will generate createjson Folder

The converted data will be based on train or valid Generated in createjson Under the folder , After that, you can go through labelme open

Because of my test The data set is empty , So it is empty after conversion , Use labelme Open the train You can see the corresponding annotation in the file under the path

边栏推荐
- Smart management of Green Cities: Digital twin underground integrated pipe gallery platform
- STM32 - switch of relay control lamp
- Network security - phishing
- View of MySQL
- DQL basic operation
- Smart management of Green Cities: Digital twin underground integrated pipe gallery platform
- ByteDance data Lake integration practice based on Hudi
- 网络安全-ACL访问控制列表
- Network security - Trojan horse
- PS remove watermark details
猜你喜欢

Learn BeanShell before you dare to say you know JMeter

STM32 - vibration sensor control relay on
![[leetcode] 797 and 1189 (basis of graph theory)](/img/2a/9c0a904151a17c2d23dea9ad04dbfe.jpg)
[leetcode] 797 and 1189 (basis of graph theory)

【数据挖掘】任务2:医学数据库MIMIC-III数据处理

The testing process that software testers should know

深度(穿透)选择器 ::v-deep/deep/及 > > >

Bottleneck period must see: how can testers who have worked for 3-5 years avoid detours and break through smoothly

自定义组件、使用npm包、全局数据共享、分包

【数据挖掘】任务1:距离计算

全链路数字化转型下,零售企业如何打开第二增长曲线
随机推荐
疫情當頭,作為Leader如何進行團隊的管理?| 社區征文
网络安全-ACL访问控制列表
Vant implements a simple login registration module and a personal user center
[technology development-23]: application of DSP in future converged networks
ByteDance data Lake integration practice based on Hudi
Everything file search tool
[shutter] animation animation (animatedwidget animation use process | create animation controller | create animation | create animatedwidget animation component | animation operation)
High resolution network (Part 1): Principle Analysis
Caused by: com.fasterxml.jackson.databind.exc.MismatchedInputException: Cannot construct instance o
小程序开发黑马购物商城中遇到的问题
String splicing function of MySQL
Smart management of Green Cities: Digital twin underground integrated pipe gallery platform
网络安全-木马
网络安全-openvas
网络安全-破解系统密码
Niuniu's ball guessing game (dynamic planning + prefix influence)
PS remove watermark details
STM32 - Application of external interrupt induction lamp
【数据挖掘】任务4:20Newsgroups聚类
DML Foundation