当前位置:网站首页>45. Instance segmented labelme dataset to coco dataset and coco dataset to labelme dataset
45. Instance segmented labelme dataset to coco dataset and coco dataset to labelme dataset
2022-07-26 21:56:00 【sxj731533730】
The basic idea : You need to label some data sets and coco Single category data set for hybrid training , Wrote script extraction coco Some data sets train2017 Instance split dataset
First step : If you use labelme Annotated dataset , It needs to be converted into coco Data set for instance segmentation training , You can use the following script to convert
labelme Data transfer coco Data sets The code comes from here labelme/examples/instance_segmentation at master · zsffuture/labelme · GitHub
import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid
import imgviz
import numpy as np
import labelme
try:
import pycocotools.mask
except ImportError:
print("Please install pycocotools:\n\n pip install pycocotools\n")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--input_dir",default="/home/ubuntu/Downloads/dest", help="input annotated directory")
parser.add_argument("--output_dir", default="/home/ubuntu/Downloads/coco",help="output dataset directory")
parser.add_argument("--labels",default="/home/ubuntu/Downloads/labels.txt", help="labels file")
parser.add_argument(
"--noviz", help="no visualization", action="store_true"
)
args = parser.parse_args()
if not os.path.exists(args.output_dir):
print("Output directory already exists:", args.output_dir)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, "JPEGImages"))
print("Creating dataset:", args.output_dir)
now = datetime.datetime.now()
data = dict(
info=dict(
description=None,
url=None,
version=None,
year=now.year,
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None,)],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name,)
)
out_ann_file = osp.join(args.output_dir, "annotations.json")
label_files = glob.glob(osp.join(args.input_dir, "*.json"))
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
print(label_file)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not args.noviz:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
args.output_dir, "Visualization", base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == "__main__":
main()Tag definition labels.txt
__ignore__
__background__
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrushUsing the above script, you can turn it into the corresponding coco Data sets
The second step : If you need the above or someone else generated coco Data sets are converted to labelme Data sets , You can use the following script ( The following script can be used to modify the data set generation for target detection )
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path='/home/ubuntu/Downloads/test/total1'
coco_json_path = '/home/ubuntu/Downloads/test/human_save_2'
jpg_path='/home/ubuntu/Downloads/test/human_save_2'
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
flag=False
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id and label[m_category_id-1]=='person':
flag = True
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
flag = False
break
points.append([x,y])
itemData = {'points': []}
if len(points)==0:
flag = False
break
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
if flag:
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print(" Loading into file complete ...")The third step : If the official coco Data sets are converted to labelme, It's a little different , Because of the official coco The actual labels of are arranged to 90, When will coco Data sets are converted to labelme Data set time , Need to modify the label , Insert empty label
Respectively in ( from 1 Start ) 12 26 29 30 45 66 68 69 71 83 Insert empty labelme
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path="/home/ubuntu/Downloads/coco_all"
coco_json_path = '/home/ubuntu/Downloads/coco2017/train_ann'
jpg_path="/home/ubuntu/Downloads/coco2017/train2017"
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id :
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
break
points.append([x,y])
itemData = {'points': []}
if len(points)==0:
flag = False
break
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print(" Loading into file complete ...")Then you need the instance of which category to split the dataset , Write your own if Condition filtering is ok
For example, I want to extract cup Data set of
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path="/home/ubuntu/Downloads/coco_all"
coco_json_path = '/home/ubuntu/Downloads/coco2017/train_ann'
jpg_path="/home/ubuntu/Downloads/coco2017/train2017"
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
flag=False
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id and label[m_category_id-1]=='cup':
flag = True
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
break
points.append([x,y])
itemData = {'points': []}
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
if flag:
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print(" Loading into file complete ...")Then compare it with your own data set , go to coco Data sets ok
边栏推荐
- 月薪5万的朋友告诉我,你只是在打杂
- Pytoch squeeze() unsqueeze() usage
- Cmake compiling obs-studio-27.2.0
- In depth analysis of the source code, why is the string class immutable? (hit me before you understand)
- Search Yiwu shopping (PAI Li Tao) API by image
- 按图搜索义乌购商品(拍立淘) API
- Japan approves the export of EUV photoresist to South Korea, and the crisis of Samsung and SK Hynix may be alleviated
- 七月集训(第26天) —— 并查集
- 拼多多获得搜索词推荐 API
- Shrimp Shope gets the product details API according to the ID
猜你喜欢

彻底搞通服务发现的原理和实现

7、 Wechat applet running error: error: illegal appid, invalid appid

《暑假每日一题》Week 7:7.18 - 7.24

VI and VIM text editors

JDBC总结

Selenium automated test interview questions family bucket

Thorough load balancing

kalibr标定realsenseD435i --多相机标定
![[MySql]substr用法-查询表的某个字段的具体位数的值](/img/d5/68658ff15f204dc97abfe7c9e6b354.png)
[MySql]substr用法-查询表的某个字段的具体位数的值

仅需一个依赖给Swagger换上新皮肤,既简单又炫酷
随机推荐
Pinduoduo gets search term recommendation API
Use of cmake
A new technical director asked me to do an IP territorial function~
Just one dependency to give swagger a new skin, which is simple and cool
It is said that Samsung obtained EUV photoresist from Belgium
Flink 在讯飞 AI 营销业务的实时数据分析实践
在上传之前预览图像
Pytorch squeeze() unsqueeze() 用法
《暑假每日一题》Week 7:7.18 - 7.24
Isilon's onefs common operation commands (I)
Number() VS parseInt()
CMake 的使用
Drag and drop table rows
cmake编译obs-studio-27.2.0
OPPO 自研大规模知识图谱及其在数智工程中的应用
Search eBay product API by keyword
京东一面:Redis 如何实现库存扣减操作?如何防止商品被超卖?
Ansible installation and use
encodeURI VS encodeURIComponent
VB.net Chart1的处理