当前位置:网站首页>PPOCRLabel格式的数据集操作总结。
PPOCRLabel格式的数据集操作总结。
2022-07-26 22:59:00 【AI浩】
1、生成识别数据
获取PPOCRLabel格式的数据集中的目标的四个点,然后使用getPerspectiveTransform和warpPerspective获取图片,生成识别数据集。
import json
import os
import numpy as np
import cv2
def get_rotate_crop_image(img, points):
''' img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) top = int(np.min(points[:, 1])) bottom = int(np.max(points[:, 1])) img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top '''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def write_txt_img(src_path,label_txt,file_dir):
with open(src_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
print(line)
content = line.split('\t')
print(content[0])
imag_name = content[0].split('/')[1]
image_path = './train_data/icdar2015/text_localization/' + content[0]
img = cv2.imread(image_path)
content[1] = content[1].replace("'", "\"")
list_dict = json.loads(content[1])
nsize = len(list_dict)
print(nsize)
num = 0
for i in range(nsize):
print(list_dict[i])
lin = list_dict[i]
info = lin['transcription']
info=info.replace(" ","")
points = lin['points']
points = [list(x) for x in points]
points = np.float32([list(map(float, item)) for item in points])
imag_name=str(num)+"_"+imag_name
save_path = './train_data/rec/' +file_dir+ imag_name
dst_img = get_rotate_crop_image(img, points)
cv2.imwrite(save_path, dst_img)
label_txt.write(file_dir+imag_name+'\t'+info+'\n')
num=num+1
if not os.path.exists('train_data/rec/train/'):
os.makedirs('train_data/rec/train/')
if not os.path.exists('train_data/rec/val/'):
os.makedirs('train_data/rec/val/')
src_path = r"./train_data/icdar2015/text_localization/train.txt"
label_txt=r"./train_data/rec/train.txt"
src_test_path = r"./train_data/icdar2015/text_localization/val.txt"
label_test_txt=r"./train_data/rec/val.txt"
with open(label_txt, 'w') as w_label:
write_txt_img(src_path,w_label,'train/')
with open(label_test_txt, 'w') as w_label:
write_txt_img(src_test_path, w_label,'val/')
2、切分训练集和验证集
按照一定的比例,将数据集切分为训练集和验证集
# 制作数据集,将Label.txt切分为训练集和验证集
import os
import shutil
from sklearn.model_selection import train_test_split
os.makedirs('train',exist_ok=True)
os.makedirs('val',exist_ok=True)
label_txt='Label.txt'
with open(label_txt, 'r',encoding='gbk') as f:
txt_List=f.readlines()
trainval_files, val_files = train_test_split(txt_List, test_size=0.2, random_state=42)
train_list=[]
for file_Line in trainval_files:
image_path = file_Line.split('\t')[0]
image_new_path='train/'+image_path.split('/')[1]+'\t'+file_Line.split('\t')[1]
train_list.append(image_new_path)
f = open("train.txt", "w")
f.writelines(train_list)
f.close()
val_list = []
for file_Line in val_files:
image_path = file_Line.split('\t')[0]
image_new_path = 'val/' + image_path.split('/')[1] + '\t' + file_Line.split('\t')[1]
val_list.append(image_new_path)
f = open("val.txt", "w")
f.writelines(val_list)
f.close()
for txt in trainval_files:
image_name=txt.split('\t')[0]
new_path="./train/"+image_name.split('/')[1]
shutil.copy(image_name, new_path)
print(image_name)
for txt in val_files:
image_name=txt.split('\t')[0]
new_path="./val/"+image_name.split('/')[1]
shutil.copy(image_name, new_path)
print(image_name)
3、将数据集生成LabelImg格式
将PPOCRLabel格式的数据集转为LabelImg标注的xml格式的数据集。
import os
from collections import defaultdict
import cv2
# import misc_utils as utils # pip3 install utils-misc==0.0.5 -i https://pypi.douban.com/simple/
import json
os.makedirs('./Annotations', exist_ok=True)
print('建立Annotations目录', 3)
# os.makedirs('./PaddleOCR/train_data/ImageSets/Main', exist_ok=True)
# print('建立ImageSets/Main目录', 3)
mem = defaultdict(list)
with open('Label.txt', 'r', encoding='utf8') as fp:
s = [i.replace('\n','').split('\t') for i in fp.readlines()]
for i in enumerate(s):
path = i[1][0]
anno = json.loads(i[1][1])
print(anno)
filename = path.split('/')[1]
img = cv2.imread(path)
cv2.imwrite('Annotations/'+filename.split('.')[0]+'.jpg',img)
height, width = img.shape[:-1]
for j in range(len(anno)):
label = 'No'
x1 = min(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
x2 = max(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
y1 = min(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
y2 = max(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
mem[filename].append([label, x1, y1, x2, y2])
# for i, filename in enumerate(mem):
# img = cv2.imread(os.path.join('train', filename))
# height, width, _ = img.shape
with open(os.path.join('./Annotations', filename.split('.')[0]) + '.xml', 'w') as f:
f.write(f"""<annotation> <folder>JPEGImages</folder> <filename>{
filename.split('.')[0]}.jpg</filename> <size> <width>{
width}</width> <height>{
height}</height> <depth>3</depth> </size> <segmented>0</segmented>\n""")
for label, x1, y1, x2, y2 in mem[filename]:
f.write(f""" <object> <name>{
label}</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>{
x1}</xmin> <ymin>{
y1}</ymin> <xmax>{
x2}</xmax> <ymax>{
y2}</ymax> </bndbox> </object>\n""")
f.write("</annotation>")
4、将PPOCRLabel格式的数据集转为DBNet训练用的icdar2015格式的数据集
import os
import json
def json_2_icdar(js_path, ic_path):
with open(js_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
print(line)
content = line.split('\t')
print(content[0])
txt_file = str(content[0].split('.')[0])+'.txt'
dst_file = os.path.join(ic_path, txt_file)
# write file
file_lineinfo = open(txt_file, 'w', encoding='utf-8')
list_dict = json.loads(content[1])
nsize = len(list_dict)
print(nsize)
for i in range(nsize):
print(list_dict[i])
lin = list_dict[i]
info = lin['transcription']
points = lin['points']
points = [int(y) for x in points for y in x]
pts = ','.join(map(str, points))
lineinfo = pts + ',' + info + '\n'
file_lineinfo.write(lineinfo)
file_lineinfo.close()
if __name__ == "__main__":
src_path = r"train/Label.txt"
dst_path = r""
json_2_icdar(src_path, dst_path)
5、数据增强
对标注的数据集做旋转、高斯模糊、色彩饱和度、亮度等增强。
import json
import os
import cv2
import numpy as np
import torchvision.transforms as transforms
from torchtoolbox.transform import Cutout
from PIL import Image
from random import randint
# 数据预处理7
t=[
transforms.ColorJitter(brightness=0.3, contrast=0.5, saturation=0.5),
transforms.GaussianBlur(5,sigma=(0.1,0.5)),
]
transform = transforms.Compose([
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.2),
transforms.GaussianBlur(5, sigma=(0.1, 3.0)),
transforms.ToTensor(),
transforms.ToPILImage(),
])
""" 旋转后图片返回 """
def dumpRotateImage(img, degree): #图片,角度
height, width = img.shape[:2]
heightNew = height
widthNew = width
matRotation = cv2.getRotationMatrix2D((width//2,height//2), degree, 1)
imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
return imgRotation, matRotation
os.makedirs('train', exist_ok=True)
src_path = "Label_new.txt"
d_path='dd.txt'
radom_p=[-3,-2,-1,0,1,2,3,4,5]
with open(d_path, 'w') as w_label:
with open(src_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
content = line.split('\t')
imag_name = content[0].split('/')[1]
image_path = content[0]
img = cv2.imread(image_path)
list_dict = json.loads(content[1])
nsize = len(list_dict)
num = 0
box = []
info = ''
for i in range(nsize):
lin = list_dict[i]
info = lin['transcription']
info = info.replace(" ", "")
points = lin['points']
points = [list(x) for x in points]
print(points)
box = [b for a in points for b in a]
break
image = cv2.imread(image_path)
for i in range(5):
imgRotation, matRotation = dumpRotateImage(image,randint(-15,15)) # 旋转
imgRotation = Image.fromarray(cv2.cvtColor(imgRotation, cv2.COLOR_BGR2RGB))
imgRotation=transform(imgRotation).convert('RGB')
imgRotation = cv2.cvtColor(np.asarray(imgRotation), cv2.COLOR_RGB2BGR)
pt1 = np.dot(matRotation, np.array([[box[0]], [box[1]], [1]]))
pt2 = np.dot(matRotation, np.array([[box[2]], [box[3]], [1]]))
pt3 = np.dot(matRotation, np.array([[box[4]], [box[5]], [1]]))
pt4 = np.dot(matRotation, np.array([[box[6]], [box[7]], [1]]))
print(int(pt4[0]))
if int(pt1[0])<5 or int(pt1[1])<5 or int(pt2[0])<5 or int(pt2[1])<5 or int(pt3[0])<5 or int(pt3[1])<5 or int(pt4[0])<5 or int(pt4[1])<5:
continue
result_info = [{
"transcription": info,
"points": [[int(pt1[0])+radom_p[randint(0, len(radom_p)-1)], int(pt1[1])+radom_p[randint(0, len(radom_p)-1)]], [int(pt2[0])+radom_p[randint(0, len(radom_p)-1)], int(pt2[1])+radom_p[randint(0, len(radom_p)-1)]],
[int(pt3[0])+radom_p[randint(0, len(radom_p)-1)], int(pt3[1])+radom_p[randint(0, len(radom_p)-1)]],
[int(pt4[0])+radom_p[randint(0, len(radom_p)-1)], int(pt4[1])+radom_p[randint(0, len(radom_p)-1)]]], "difficult": "false"}]
imag_d_path = "train/" + imag_name.split('.')[0] + "_" + str(i) + "_0726." + imag_name.split('.')[
1] + "\t" + str(result_info)+'\n'
print(imag_d_path)
cv2.imwrite('./train/' + imag_name.split('.')[0] + "_" + str(i) + "_0726." + imag_name.split('.')[1],
imgRotation)
w_label.write(imag_d_path.replace('\'','\"'))
6、删除没有标注的图片
将标注的数据和图片的列表做差,将多余的图片删除。
import os
image_list = os.listdir('train/')
label_txt = "Label_new.txt"
label_list=[]
with open(label_txt, 'r') as label_s:
for line in label_s.readlines():
label_list.append(line.split('\t')[0].split('/')[1])
cha_list=list(set(image_list) - set(label_list))
for img in cha_list:
os.remove('train/'+img)
边栏推荐
- HCIP-第二天
- Lora网关节点汇聚传感器数据
- N methods of SQL optimization
- 离开页面的提示
- Open the door of programming
- MGRE, PPP, HDLC comprehensive experiment
- HCIA (network elementary comprehensive experimental exercise)
- Ogeek meetup phase I, together with cubefs, is hot
- The latest JD SMS login + silly girl robot nanny level deployment tutorial (July 24, 2022)
- Nb-iot networking communication
猜你喜欢

What is the principle of synchronized lock escalation in multithreading?

HCIP-第六天-OSPF静态大实验

Nb-iot access to cloud platform

lvs+keepalived项目实战

(CF1691D) Max GEQ Sum

Esp8266wi fi data communication

HCIP第一天静态路由综合实验

Prompt to leave the page

RS-485 bus communication application

C language - first program, print, variables and constants
随机推荐
Use of golang - sync package (waitgroup, once, mutex, rwmutex, cond, pool, map)
Ogeek meetup phase I, together with cubefs, is hot
C language - first program, print, variables and constants
MySQL course 2. Various queries of tables
(super detailed version, don't know to comment at any time) codeforces round 804 (Div. 2) C the third problem
记录HandsomeBlog的star用户
Prompt to leave the page
光光光仔的CSDN之旅
Nb-iot networking communication
离开页面的提示
js中的数组方法和循环
通过ensp让静态路由实现全网可达
Self introduction and planning about programming
About unsafe problems such as fopen and strError encountered in vs2022 or advanced version running environment
Open the door of programming
[C language] factorial implementation
【降维打击,带你深度学习CPU(上)】
[C language] relevant distinction between strlen and sizeof
Detailed source code of golang bufio reader
RIP路由信息协议-拓扑实验