当前位置:网站首页>Visual yolov5 format data set (labelme JSON file)
Visual yolov5 format data set (labelme JSON file)
2022-07-03 01:54:00 【athrunsunny】
In my own project , There are often few data sets , But there are some marked data on the Internet , It is more or less different from the marking requirements of your own project , I don't want to re label , Just want to fine tune it , however yolov5 The native format of is not intuitive to modify , At this time, you can yolov5 Format data into labelme Of json Format , In this way, it is convenient to fine tune the annotation of data , At the same time, there is no need to spend a lot of effort to label big data , Reduce labor costs .
# -*- coding: utf-8 -*-
"""
Time: 2021.10.26
Author: Athrunsunny
Version: V 0.1
File: yolotolabelme.py
Describe: Functions in this file is change the dataset format to labelme json file
"""
import base64
import io
import os
import numpy as np
import json
from glob import glob
import cv2
import shutil
import yaml
from tqdm import tqdm
import PIL.Image
ROOT_DIR = os.getcwd()
VERSION = '4.5.7' # according to labelme Version of
def img_arr_to_b64(img_arr):
img_pil = PIL.Image.fromarray(img_arr)
f = io.BytesIO()
img_pil.save(f, format="PNG")
img_bin = f.getvalue()
if hasattr(base64, "encodebytes"):
img_b64 = base64.encodebytes(img_bin)
else:
img_b64 = base64.encodestring(img_bin)
return img_b64
def process_point(points, cls):
info = list()
for point in points:
shape_info = dict()
shape_info['label'] = cls[int(point[0])]
if point is None:
shape_info['points'] = [[], []]
else:
shape_info['points'] = [[point[1], point[2]],
[point[3], point[4]]]
shape_info['group_id'] = None
shape_info['shape_type'] = 'rectangle'
shape_info['flags'] = dict()
info.append(shape_info)
return info
def create_json(img, imagePath, filename, info):
data = dict()
data['version'] = VERSION
data['flags'] = dict()
data['shapes'] = info
data['imagePath'] = imagePath
height, width = img.shape[:2]
data['imageData'] = img_arr_to_b64(img).decode('utf-8')
data['imageHeight'] = height
data['imageWidth'] = width
jsondata = json.dumps(data, indent=4, separators=(',', ': '))
f = open(filename, 'w')
f.write(jsondata)
f.close()
def read_txt(path):
assert os.path.exists(path)
with open(path, mode='r', encoding="utf-8") as f:
content = f.readlines()
content = np.array(content)
res = []
for index, item in enumerate(content):
string = item.split(' ')
res.append(list(map(np.float64, string)))
return np.array(res)
def load_dataset_info(path=ROOT_DIR):
yamlpath = glob(path + "\\*.yaml")[0]
with open(yamlpath, "r", encoding="utf-8") as f:
data = yaml.load(f, Loader=yaml.FullLoader)
return data
def reconvert_list(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] / dw
w = box[2] / dw
y = box[1] / dh
h = box[3] / dh
x1 = ((x + 1) * 2 - w) / 2.
y1 = ((y + 1) * 2 - h) / 2.
x2 = ((x + 1) * 2 + w) / 2.
y2 = ((y + 1) * 2 + h) / 2.
return x1, y1, x2, y2
def reconvert_np(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[:, :1] / dw
w = box[:, 2:3] / dw
y = box[:, 1:2] / dh
h = box[:, 3:4] / dh
box[:, :1] = ((x + 1) * 2 - w) / 2.
box[:, 2:3] = ((x + 1) * 2 + w) / 2.
box[:, 1:2] = ((y + 1) * 2 - h) / 2.
box[:, 3:4] = ((y + 1) * 2 + h) / 2.
return box
def txt2json(proctype, cls, path=ROOT_DIR):
process_image_path = os.path.join(path, proctype, 'images')
process_label_path = os.path.join(path, proctype, 'labels')
externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']
imgfiles = list()
for extern in externs:
imgfiles.extend(glob(process_image_path + "\\*." + extern))
createfile = os.path.join(ROOT_DIR, 'createjson', proctype)
if not os.path.exists(createfile):
os.makedirs(createfile)
for image_path in tqdm(imgfiles):
frame = cv2.imread(image_path)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
height, width = frame.shape[:2]
size = (width, height)
imgfilename = image_path.replace("\\", "/").split("/")[-1]
imgname = '.'.join(imgfilename.split('.')[:-1])
jsonpath = os.path.join(createfile, imgname + '.json')
txtpath = os.path.join(process_label_path, imgname + '.txt')
label_and_point = read_txt(txtpath)
label_and_point[:, 1:] = reconvert_np(size, label_and_point[:, 1:])
info = process_point(label_and_point, cls)
create_json(frame, imgname, jsonpath, info)
shutil.copy(image_path, createfile)
def yolotolabelme(path=ROOT_DIR):
pathtype = list()
if 'train' in os.listdir(path):
pathtype.append('train')
if 'valid' in os.listdir(path):
pathtype.append('valid')
if 'test' in os.listdir(path):
pathtype.append('test')
cls = load_dataset_info()['names']
for file_type in pathtype:
print("Processing image type {} \n".format(file_type))
txt2json(file_type, cls)
if __name__ == "__main__":
yolotolabelme()
Name the above code yolotolabelme.py And stored in the root directory of the data set
Before running the program, put the above code import Install several libraries of , And then run
After running, it will generate createjson Folder
The converted data will be based on train or valid Generated in createjson Under the folder , After that, you can go through labelme open
Because of my test The data set is empty , So it is empty after conversion , Use labelme Open the train You can see the corresponding annotation in the file under the path
边栏推荐
- 网络安全-钓鱼
- Vant implements a simple login registration module and a personal user center
- Analysis, use and extension of open source API gateway apisex
- DDL basic operation
- Niuniu's ball guessing game (dynamic planning + prefix influence)
- Problems encountered in small program development of dark horse shopping mall
- 网络安全-DNS欺骗与钓鱼网站
- Huakaiyun (Zhiyin) | virtual host: what is a virtual host
- Internal connection query and external connection
- [error record] navigator operation requested with a context that does not include a naviga
猜你喜欢
【Camera专题】手把手撸一份驱动 到 点亮Camera
[understanding of opportunity -36]: Guiguzi - flying clamp chapter - prevention against killing and bait
ByteDance data Lake integration practice based on Hudi
Custom components, using NPM packages, global data sharing, subcontracting
Main features of transport layer TCP and TCP connection
Scheme and practice of cold and hot separation of massive data
Redis:Redis的简单使用
Some functions of applet development
【數據挖掘】任務6:DBSCAN聚類
[camera topic] how to save OTP data in user-defined nodes
随机推荐
[data mining] task 3: decision tree classification
File class (check)
Network security NAT network address translation
网络安全-钓鱼
网络安全-木马
[fluent] hero animation (hero animation use process | create hero animation core components | create source page | create destination page | page Jump)
Telecom Customer Churn Prediction challenge
STM32 - switch of relay control lamp
疫情當頭,作為Leader如何進行團隊的管理?| 社區征文
【數據挖掘】任務6:DBSCAN聚類
[shutter] animation animation (animatedwidget animation use process | create animation controller | create animation | create animatedwidget animation component | animation operation)
Basic operation of view
Take you ten days to easily complete the go micro service series (I)
[keil5 debugging] debug is stuck in reset_ Handler solution
Hard core observation 547 large neural network may be beginning to become aware?
网络安全-最简单的病毒
网络安全-破解系统密码
Network security - the simplest virus
[camera topic] turn a drive to light up the camera
2022 financial product revenue ranking