当前位置:网站首页>45、实例分割的labelme数据集转coco数据集以及coco数据集转labelme数据集
45、实例分割的labelme数据集转coco数据集以及coco数据集转labelme数据集
2022-07-26 21:10:00 【sxj731533730】
基本思想:需要自己标注一些数据集和coco单类别数据集进行混合训练,写了脚本提取coco部分数据集 train2017 实例分割数据集
第一步:如果你用labelme标注了数据集,需要转成coco数据集进行实例分割训练,可以用下面脚本进行转换
labelme数据转coco数据集 代码来自这labelme/examples/instance_segmentation at master · zsffuture/labelme · GitHub
import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid
import imgviz
import numpy as np
import labelme
try:
import pycocotools.mask
except ImportError:
print("Please install pycocotools:\n\n pip install pycocotools\n")
sys.exit(1)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("--input_dir",default="/home/ubuntu/Downloads/dest", help="input annotated directory")
parser.add_argument("--output_dir", default="/home/ubuntu/Downloads/coco",help="output dataset directory")
parser.add_argument("--labels",default="/home/ubuntu/Downloads/labels.txt", help="labels file")
parser.add_argument(
"--noviz", help="no visualization", action="store_true"
)
args = parser.parse_args()
if not os.path.exists(args.output_dir):
print("Output directory already exists:", args.output_dir)
os.makedirs(args.output_dir)
os.makedirs(osp.join(args.output_dir, "JPEGImages"))
print("Creating dataset:", args.output_dir)
now = datetime.datetime.now()
data = dict(
info=dict(
description=None,
url=None,
version=None,
year=now.year,
contributor=None,
date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
),
licenses=[dict(url=None, id=0, name=None,)],
images=[
# license, url, file_name, height, width, date_captured, id
],
type="instances",
annotations=[
# segmentation, area, iscrowd, image_id, bbox, category_id, id
],
categories=[
# supercategory, id, name
],
)
class_name_to_id = {}
for i, line in enumerate(open(args.labels).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
if class_id == -1:
assert class_name == "__ignore__"
continue
class_name_to_id[class_name] = class_id
data["categories"].append(
dict(supercategory=None, id=class_id, name=class_name,)
)
out_ann_file = osp.join(args.output_dir, "annotations.json")
label_files = glob.glob(osp.join(args.input_dir, "*.json"))
for image_id, filename in enumerate(label_files):
print("Generating dataset from:", filename)
label_file = labelme.LabelFile(filename=filename)
print(label_file)
base = osp.splitext(osp.basename(filename))[0]
out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
img = labelme.utils.img_data_to_arr(label_file.imageData)
imgviz.io.imsave(out_img_file, img)
data["images"].append(
dict(
license=0,
url=None,
file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
height=img.shape[0],
width=img.shape[1],
date_captured=None,
id=image_id,
)
)
masks = {} # for area
segmentations = collections.defaultdict(list) # for segmentation
for shape in label_file.shapes:
points = shape["points"]
label = shape["label"]
group_id = shape.get("group_id")
shape_type = shape.get("shape_type", "polygon")
mask = labelme.utils.shape_to_mask(
img.shape[:2], points, shape_type
)
if group_id is None:
group_id = uuid.uuid1()
instance = (label, group_id)
if instance in masks:
masks[instance] = masks[instance] | mask
else:
masks[instance] = mask
if shape_type == "rectangle":
(x1, y1), (x2, y2) = points
x1, x2 = sorted([x1, x2])
y1, y2 = sorted([y1, y2])
points = [x1, y1, x2, y1, x2, y2, x1, y2]
else:
points = np.asarray(points).flatten().tolist()
segmentations[instance].append(points)
segmentations = dict(segmentations)
for instance, mask in masks.items():
cls_name, group_id = instance
if cls_name not in class_name_to_id:
continue
cls_id = class_name_to_id[cls_name]
mask = np.asfortranarray(mask.astype(np.uint8))
mask = pycocotools.mask.encode(mask)
area = float(pycocotools.mask.area(mask))
bbox = pycocotools.mask.toBbox(mask).flatten().tolist()
data["annotations"].append(
dict(
id=len(data["annotations"]),
image_id=image_id,
category_id=cls_id,
segmentation=segmentations[instance],
area=area,
bbox=bbox,
iscrowd=0,
)
)
if not args.noviz:
labels, captions, masks = zip(
*[
(class_name_to_id[cnm], cnm, msk)
for (cnm, gid), msk in masks.items()
if cnm in class_name_to_id
]
)
viz = imgviz.instances2rgb(
image=img,
labels=labels,
masks=masks,
captions=captions,
font_size=15,
line_width=2,
)
out_viz_file = osp.join(
args.output_dir, "Visualization", base + ".jpg"
)
imgviz.io.imsave(out_viz_file, viz)
with open(out_ann_file, "w") as f:
json.dump(data, f)
if __name__ == "__main__":
main()标签定义 labels.txt
__ignore__
__background__
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush利用上面脚本就可以转成对应的coco数据集
第二步:要是需要上述或者别人生成的coco数据集转成labelme数据集,可以用下面的脚本(可以将下列脚本修改用于目标检测的数据集生成)
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path='/home/ubuntu/Downloads/test/total1'
coco_json_path = '/home/ubuntu/Downloads/test/human_save_2'
jpg_path='/home/ubuntu/Downloads/test/human_save_2'
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
flag=False
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id and label[m_category_id-1]=='person':
flag = True
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
flag = False
break
points.append([x,y])
itemData = {'points': []}
if len(points)==0:
flag = False
break
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
if flag:
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print("加载入文件完成...")第三步:如果要将官方的coco数据集转成labelme,稍微有点不同,因为官方的coco的实际标签是排列到90,当将coco数据集转成labelme数据集时候,需要修改一下标签,插入空标签
分别要在(从1开始) 12 26 29 30 45 66 68 69 71 83 插入空labelme
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path="/home/ubuntu/Downloads/coco_all"
coco_json_path = '/home/ubuntu/Downloads/coco2017/train_ann'
jpg_path="/home/ubuntu/Downloads/coco2017/train2017"
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id :
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
break
points.append([x,y])
itemData = {'points': []}
if len(points)==0:
flag = False
break
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print("加载入文件完成...")然后自己需要哪个类别的实例分割数据集,自己写个if条件筛选就行
比如我要提取cup的数据集
# -*- coding: utf-8 -*-
import glob
import os
import cv2
import json
import io
coco=["person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant","" ,"stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "","backpack", "umbrella","", "","handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle","", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed","", "dining table", "","","toilet", "","tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "","clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"]
label=dict()
for idx,item in enumerate(coco):
label.update({idx:item})
labelme_path="/home/ubuntu/Downloads/coco_all"
coco_json_path = '/home/ubuntu/Downloads/coco2017/train_ann'
jpg_path="/home/ubuntu/Downloads/coco2017/train2017"
coco_json=glob.glob(os.path.join(coco_json_path,"*.json"))[0]
file_json = io.open(coco_json,'r',encoding='utf-8')
m_json_data = file_json.read()
m_data = json.loads(m_json_data)
#m_type=m_data['type']
for item in m_data['images']:
flag=False
m_images_file_name = item['file_name']
(filename_path, m_filename) = os.path.split(m_images_file_name)
(m_name, extension) = os.path.splitext(m_filename)
m_image=cv2.imread(os.path.join(jpg_path,m_name+".jpg"))
m_images_height = item['height']
m_images_width = item['width']
m_images_id = item['id']
data = {}
data['imagePath'] = m_filename
data['flags'] = {}
data['imageWidth'] = m_images_width
data['imageHeight'] = m_images_height
data['imageData'] = None
data['version'] = "5.0.1"
data["shapes"] = []
for annit in m_data['annotations']:
m_image_id=annit['image_id']
m_category_id=annit['category_id']
if m_image_id==m_images_id and label[m_category_id-1]=='cup':
flag = True
for segitem in annit['segmentation']:
points = []
for idx in range(0,len(segitem),2):
x,y=segitem[idx],segitem[idx+1]
if str(x).isalpha() or str(y).isalpha():
break
points.append([x,y])
itemData = {'points': []}
itemData['points'].extend(points)
itemData["flag"] = {}
itemData["group_id"] = None
itemData["shape_type"] = "polygon"
itemData["label"] =label[m_category_id-1]
data["shapes"].append(itemData)
if flag:
jsonName = ".".join([m_name, "json"])
jpgName = ".".join([m_name, "jpg"])
print(labelme_path, jsonName)
jsonPath = os.path.join(labelme_path, jsonName)
jpgPath = os.path.join(labelme_path, jpgName)
with open(jsonPath, "w") as f:
json.dump(data, f)
cv2.imwrite(jpgPath,m_image)
print("加载入文件完成...")然后在和自己的数据集中和一下,转到coco数据集就ok
边栏推荐
- 拼多多获得搜索词推荐 API
- 吃透负载均衡
- 《暑假每日一题》Week 7:7.18 - 7.24
- 同花顺上面开户安全吗,开户怎么选券商
- Happens-Before原则深入解读
- Just one dependency to give swagger a new skin, which is simple and cool
- 伟创力回应“扣押华为物料”事件:深感遗憾,期待继续合作!
- Pytorch squeeze() unsqueeze() 用法
- Highlight the secondary and tertiary columns under the primary column of pbootcms
- 基于CAShapeLayer和贝塞尔曲线的圆形进度条动画
猜你喜欢

六、微信小程序发布流程

flask 源码启动阶段

If you do not add waitkey() function after imshow() function, it will not be displayed

安全浏览器“隐身”模式可以查看历史记录吗?

仅需一个依赖给Swagger换上新皮肤,既简单又炫酷
![[mysql]substr usage - query the value of specific digits of a field in the table](/img/d5/68658ff15f204dc97abfe7c9e6b354.png)
[mysql]substr usage - query the value of specific digits of a field in the table

Isilon 的OneFs常见操作命令(一)

Cmake compiling obs-studio-27.2.0

逻辑漏洞----任意账号注册

What to do if the browser home page is tampered with, and how to recover if the home page is tampered with
随机推荐
Number() VS parseInt()
Thoroughly understand the principle and implementation of service discovery
Isilon's onefs common operation commands (I)
【Flutter -- GetX】弹框 - Dialog、Snackbar、BottomSheet
A unified label space for entity relationship extraction
contenteditable 元素的placeholder
:active vs :focus
方法重载与方法重写
5、 Applet error: message:error: system error, error code: 80058, desc of scope userLocation is empty
测试员:“我有五年测试经验”HR: “不,你只是把一年的工作经验用了五年”
七、微信小程序运行报错:Error: AppID 不合法,invalid appid
小米1亿像素摄像头揭秘:1/1.3英寸COMS传感器、分辨率12032×9024
Preview image before uploading
虾皮shopee根据ID取商品详情 API
华为发布2025十大趋势:5G、机器人、AI等上榜
Flag decodes token, mounts token, decorator, and seven cattle cloud upload
Six instructions of Memcache based caching mechanism
六、微信小程序发布流程
伟创力回应“扣押华为物料”事件:深感遗憾,期待继续合作!
基于memcache的缓存机制的6个指令