当前位置:网站首页>[深度学习][原创]让yolov6-0.1.0支持yolov5的txt读取数据集模式
[深度学习][原创]让yolov6-0.1.0支持yolov5的txt读取数据集模式
2022-07-05 15:49:00 【FL1623863129】
美团出了一个yolov6框架目前看来很不错,由于没出来多久,有很多没有完善。今天特意训练自己的数据集发现这个框架只能是按照这个模式摆放:
custom_dataset
├── images
│ ├── train
│ │ ├── train0.jpg
│ │ └── train1.jpg
│ ├── val
│ │ ├── val0.jpg
│ │ └── val1.jpg
│ └── test
│ ├── test0.jpg
│ └── test1.jpg
└── labels
├── train
│ ├── train0.txt
│ └── train1.txt
├── val
│ ├── val0.txt
│ └── val1.txt
└── test
├── test0.txt
└── test1.txt
而我更喜欢yolov5的模式,当然yolov5也是支持上面摆放模式。
images-
1.jpg
2.jpg
......
labels-
1.txt
2.txt
.......
然后把分割数据集放txt里面
train.txt
/home/fut/data/images/1.jpg
/home/fut/data/images/2.jpg
....
val.txt
/home/fut/data/images/6.jpg
/home/fut/data/images/7.jpg
....
在配置文件这么配置:
train: myproj/config/train.txt
val: myproj/config/val.txt
nc: 2
# whether it is coco dataset, only coco dataset should be set to True.
is_coco: False
# class names
names: ['dog','cat']
这样就不用每次切割四个文件夹了。话不多说开始改代码,我们打开YOLOv6-0.1.0/yolov6/data/datasets.py修改
def get_imgs_labels(self, img_dir):这个函数加载模式即可。下面是这个函数修改后完整代码
def get_imgs_labels(self, img_dir):
NUM_THREADS = min(8, os.cpu_count())
if os.path.isdir(img_dir):
valid_img_record = osp.join(
osp.dirname(img_dir), "." + osp.basename(img_dir) + ".json"
)
img_paths = glob.glob(osp.join(img_dir, "*"), recursive=True)
img_paths = sorted(
p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS
)
assert img_paths, f"No images found in {img_dir}."
else:
with open(img_dir,'r') as f:
img_paths = f.read().rstrip('\n').split('\n')
valid_img_record = os.path.dirname(img_dir)+os.sep+'.'+osp.basename(img_dir)[:-4] + ".json"
img_hash = self.get_hash(img_paths)
if osp.exists(valid_img_record):
with open(valid_img_record, "r") as f:
cache_info = json.load(f)
if "image_hash" in cache_info and cache_info["image_hash"] == img_hash:
img_info = cache_info["information"]
else:
self.check_images = True
else:
self.check_images = True
# check images
if self.check_images and self.main_process:
img_info = {}
nc, msgs = 0, [] # number corrupt, messages
LOGGER.info(
f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): "
)
with Pool(NUM_THREADS) as pool:
pbar = tqdm(
pool.imap(TrainValDataset.check_image, img_paths),
total=len(img_paths),
)
for img_path, shape_per_img, nc_per_img, msg in pbar:
if nc_per_img == 0: # not corrupted
img_info[img_path] = {"shape": shape_per_img}
nc += nc_per_img
if msg:
msgs.append(msg)
pbar.desc = f"{nc} image(s) corrupted"
pbar.close()
if msgs:
LOGGER.info("\n".join(msgs))
cache_info = {"information": img_info, "image_hash": img_hash}
# save valid image paths.
with open(valid_img_record, "w") as f:
json.dump(cache_info, f)
# # check and load anns
# label_dir = osp.join(
# osp.dirname(osp.dirname(img_dir)), "coco", osp.basename(img_dir)
# )
# assert osp.exists(label_dir), f"{label_dir} is an invalid directory path!"
img_paths = list(img_info.keys())
label_dir = os.path.dirname(img_paths[0]).replace('images', 'labels')
label_paths = sorted(
osp.join(label_dir, osp.splitext(osp.basename(p))[0] + ".txt")
for p in img_paths
)
label_hash = self.get_hash(label_paths)
if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash:
self.check_labels = True
if self.check_labels:
cache_info["label_hash"] = label_hash
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number corrupt, messages
LOGGER.info(
f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): "
)
with Pool(NUM_THREADS) as pool:
pbar = pool.imap(
TrainValDataset.check_label_files, zip(img_paths, label_paths)
)
pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar
for (
img_path,
labels_per_file,
nc_per_file,
nm_per_file,
nf_per_file,
ne_per_file,
msg,
) in pbar:
if nc_per_file == 0:
img_info[img_path]["labels"] = labels_per_file
else:
img_info.pop(img_path)
nc += nc_per_file
nm += nm_per_file
nf += nf_per_file
ne += ne_per_file
if msg:
msgs.append(msg)
if self.main_process:
pbar.desc = f"{nf} label(s) found, {nm} label(s) missing, {ne} label(s) empty, {nc} invalid label files"
if self.main_process:
pbar.close()
with open(valid_img_record, "w") as f:
json.dump(cache_info, f)
if msgs:
LOGGER.info("\n".join(msgs))
if nf == 0:
LOGGER.warning(
f"WARNING: No labels found in {osp.dirname(self.img_paths[0])}. "
)
if self.task.lower() == "val":
if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset.
assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml"
else:
assert (
self.class_names
), "Class names is required when converting labels to coco format for evaluating."
save_dir = osp.join(osp.dirname(osp.dirname(img_dir)), "annotations")
if not osp.exists(save_dir):
os.mkdir(save_dir)
save_path = osp.join(
save_dir, "instances_" + osp.basename(img_dir) + ".json"
)
TrainValDataset.generate_coco_format_labels(
img_info, self.class_names, save_path
)
img_paths, labels = list(
zip(
*[
(
img_path,
np.array(info["labels"], dtype=np.float32)
if info["labels"]
else np.zeros((0, 5), dtype=np.float32),
)
for img_path, info in img_info.items()
]
)
)
self.img_info = img_info
LOGGER.info(
f"{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. "
)
return img_paths, labels
边栏推荐
- 超分辨率技术在实时音视频领域的研究与实践
- 17. [stm32] use only three wires to drive LCD1602 LCD
- Codasip adds verify safe startup function to risc-v processor series
- 详解SQL中Groupings Sets 语句的功能和底层实现逻辑
- Relationship between objects and classes
- Replknet: it's not that large convolution is bad, but that convolution is not large enough. 31x31 convolution. Let's have a look at | CVPR 2022
- CISP-PTE之PHP伪协议总结
- ES6深入—async 函数 与 Symbol 类型
- list集合根据对象某属性求和,最大值等
- Transaction rollback exception
猜你喜欢
Subclasses and superclasses of abstract classes
极坐标扇图使用场景与功能详解
迁移/home分区
ES6 deep - ES6 class class
Convert obj set to entity set
The visual experience has been comprehensively upgraded, and Howell group and Intel Evo 3.0 have jointly accelerated the reform of the PC industry
公司自用的国产API管理神器
scratch五彩糖葫芦 电子学会图形化编程scratch等级考试三级真题和答案解析2022年6月
CISP-PTE之SQL注入(二次注入的应用)
Research and practice of super-resolution technology in the field of real-time audio and video
随机推荐
How to use FRP intranet penetration +teamviewer to quickly connect to the intranet host at home when mobile office
对象和类的关系
10 minutes to help you get ZABBIX monitoring platform alarm pushed to nail group
一键安装脚本实现快速部署GrayLog Server 4.2.10单机版
后台系统发送验证码功能
项目sql中批量update的时候参数类型设置错误
PSPNet | 语义分割及场景分析
极坐标扇图使用场景与功能详解
详解SQL中Groupings Sets 语句的功能和底层实现逻辑
一些認知的思考
Intel 13th generation Raptor Lake processor information exposure: more cores, larger cache
obj集合转为实体集合
ES6 deep - ES6 class class
Subclasses and superclasses of abstract classes
10分钟帮你搞定Zabbix监控平台告警推送到钉钉群
Clock switching with multiple relationship
You should have your own persistence
漫画:什么是八皇后问题?
vant tabbar遮挡内容的解决方式
Six common transaction solutions, you sing, I come on stage (no best, only better)