当前位置:网站首页>Summary of dataset operations in ppocrlabel format.
Summary of dataset operations in ppocrlabel format.
2022-07-27 02:35:00 【AI Hao】
1、 Generate identification data
obtain PPOCRLabel Format the four points of the target in the dataset , And then use getPerspectiveTransform and warpPerspective Get photo , Generate identification data set .
import json
import os
import numpy as np
import cv2
def get_rotate_crop_image(img, points):
''' img_height, img_width = img.shape[0:2] left = int(np.min(points[:, 0])) right = int(np.max(points[:, 0])) top = int(np.min(points[:, 1])) bottom = int(np.max(points[:, 1])) img_crop = img[top:bottom, left:right, :].copy() points[:, 0] = points[:, 0] - left points[:, 1] = points[:, 1] - top '''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def write_txt_img(src_path,label_txt,file_dir):
with open(src_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
print(line)
content = line.split('\t')
print(content[0])
imag_name = content[0].split('/')[1]
image_path = './train_data/icdar2015/text_localization/' + content[0]
img = cv2.imread(image_path)
content[1] = content[1].replace("'", "\"")
list_dict = json.loads(content[1])
nsize = len(list_dict)
print(nsize)
num = 0
for i in range(nsize):
print(list_dict[i])
lin = list_dict[i]
info = lin['transcription']
info=info.replace(" ","")
points = lin['points']
points = [list(x) for x in points]
points = np.float32([list(map(float, item)) for item in points])
imag_name=str(num)+"_"+imag_name
save_path = './train_data/rec/' +file_dir+ imag_name
dst_img = get_rotate_crop_image(img, points)
cv2.imwrite(save_path, dst_img)
label_txt.write(file_dir+imag_name+'\t'+info+'\n')
num=num+1
if not os.path.exists('train_data/rec/train/'):
os.makedirs('train_data/rec/train/')
if not os.path.exists('train_data/rec/val/'):
os.makedirs('train_data/rec/val/')
src_path = r"./train_data/icdar2015/text_localization/train.txt"
label_txt=r"./train_data/rec/train.txt"
src_test_path = r"./train_data/icdar2015/text_localization/val.txt"
label_test_txt=r"./train_data/rec/val.txt"
with open(label_txt, 'w') as w_label:
write_txt_img(src_path,w_label,'train/')
with open(label_test_txt, 'w') as w_label:
write_txt_img(src_test_path, w_label,'val/')
2、 Segmentation training set and verification set
In a certain proportion , The data set is divided into training set and verification set
# Make datasets , take Label.txt It is divided into training set and verification set
import os
import shutil
from sklearn.model_selection import train_test_split
os.makedirs('train',exist_ok=True)
os.makedirs('val',exist_ok=True)
label_txt='Label.txt'
with open(label_txt, 'r',encoding='gbk') as f:
txt_List=f.readlines()
trainval_files, val_files = train_test_split(txt_List, test_size=0.2, random_state=42)
train_list=[]
for file_Line in trainval_files:
image_path = file_Line.split('\t')[0]
image_new_path='train/'+image_path.split('/')[1]+'\t'+file_Line.split('\t')[1]
train_list.append(image_new_path)
f = open("train.txt", "w")
f.writelines(train_list)
f.close()
val_list = []
for file_Line in val_files:
image_path = file_Line.split('\t')[0]
image_new_path = 'val/' + image_path.split('/')[1] + '\t' + file_Line.split('\t')[1]
val_list.append(image_new_path)
f = open("val.txt", "w")
f.writelines(val_list)
f.close()
for txt in trainval_files:
image_name=txt.split('\t')[0]
new_path="./train/"+image_name.split('/')[1]
shutil.copy(image_name, new_path)
print(image_name)
for txt in val_files:
image_name=txt.split('\t')[0]
new_path="./val/"+image_name.split('/')[1]
shutil.copy(image_name, new_path)
print(image_name)
3、 Generate data sets LabelImg Format
take PPOCRLabel Format data set to LabelImg Dimensioned xml Data set in format .
import os
from collections import defaultdict
import cv2
# import misc_utils as utils # pip3 install utils-misc==0.0.5 -i https://pypi.douban.com/simple/
import json
os.makedirs('./Annotations', exist_ok=True)
print(' establish Annotations Catalog ', 3)
# os.makedirs('./PaddleOCR/train_data/ImageSets/Main', exist_ok=True)
# print(' establish ImageSets/Main Catalog ', 3)
mem = defaultdict(list)
with open('Label.txt', 'r', encoding='utf8') as fp:
s = [i.replace('\n','').split('\t') for i in fp.readlines()]
for i in enumerate(s):
path = i[1][0]
anno = json.loads(i[1][1])
print(anno)
filename = path.split('/')[1]
img = cv2.imread(path)
cv2.imwrite('Annotations/'+filename.split('.')[0]+'.jpg',img)
height, width = img.shape[:-1]
for j in range(len(anno)):
label = 'No'
x1 = min(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
x2 = max(int(anno[j - 1]['points'][0][0]), int(anno[j - 1]['points'][1][0]),
int(anno[j - 1]['points'][2][0]), int(anno[j - 1]['points'][3][0]))
y1 = min(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
y2 = max(int(anno[j - 1]['points'][0][1]), int(anno[j - 1]['points'][1][1]),
int(anno[j - 1]['points'][2][1]), int(anno[j - 1]['points'][3][1]))
mem[filename].append([label, x1, y1, x2, y2])
# for i, filename in enumerate(mem):
# img = cv2.imread(os.path.join('train', filename))
# height, width, _ = img.shape
with open(os.path.join('./Annotations', filename.split('.')[0]) + '.xml', 'w') as f:
f.write(f"""<annotation> <folder>JPEGImages</folder> <filename>{
filename.split('.')[0]}.jpg</filename> <size> <width>{
width}</width> <height>{
height}</height> <depth>3</depth> </size> <segmented>0</segmented>\n""")
for label, x1, y1, x2, y2 in mem[filename]:
f.write(f""" <object> <name>{
label}</name> <pose>Unspecified</pose> <truncated>0</truncated> <difficult>0</difficult> <bndbox> <xmin>{
x1}</xmin> <ymin>{
y1}</ymin> <xmax>{
x2}</xmax> <ymax>{
y2}</ymax> </bndbox> </object>\n""")
f.write("</annotation>")
4、 take PPOCRLabel Format data set to DBNet For training icdar2015 Data set in format
import os
import json
def json_2_icdar(js_path, ic_path):
with open(js_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
print(line)
content = line.split('\t')
print(content[0])
txt_file = str(content[0].split('.')[0])+'.txt'
dst_file = os.path.join(ic_path, txt_file)
# write file
file_lineinfo = open(txt_file, 'w', encoding='utf-8')
list_dict = json.loads(content[1])
nsize = len(list_dict)
print(nsize)
for i in range(nsize):
print(list_dict[i])
lin = list_dict[i]
info = lin['transcription']
points = lin['points']
points = [int(y) for x in points for y in x]
pts = ','.join(map(str, points))
lineinfo = pts + ',' + info + '\n'
file_lineinfo.write(lineinfo)
file_lineinfo.close()
if __name__ == "__main__":
src_path = r"train/Label.txt"
dst_path = r""
json_2_icdar(src_path, dst_path)
5、 Data to enhance
Rotate the labeled dataset 、 Gaussian blur 、 Color saturation 、 Brightness and other enhancements .
import json
import os
import cv2
import numpy as np
import torchvision.transforms as transforms
from torchtoolbox.transform import Cutout
from PIL import Image
from random import randint
# Data preprocessing 7
t=[
transforms.ColorJitter(brightness=0.3, contrast=0.5, saturation=0.5),
transforms.GaussianBlur(5,sigma=(0.1,0.5)),
]
transform = transforms.Compose([
transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.2),
transforms.GaussianBlur(5, sigma=(0.1, 3.0)),
transforms.ToTensor(),
transforms.ToPILImage(),
])
""" After rotation, the picture returns """
def dumpRotateImage(img, degree): # picture , angle
height, width = img.shape[:2]
heightNew = height
widthNew = width
matRotation = cv2.getRotationMatrix2D((width//2,height//2), degree, 1)
imgRotation = cv2.warpAffine(img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255))
return imgRotation, matRotation
os.makedirs('train', exist_ok=True)
src_path = "Label_new.txt"
d_path='dd.txt'
radom_p=[-3,-2,-1,0,1,2,3,4,5]
with open(d_path, 'w') as w_label:
with open(src_path, 'r', encoding='utf-8') as f:
for line in f.readlines():
content = line.split('\t')
imag_name = content[0].split('/')[1]
image_path = content[0]
img = cv2.imread(image_path)
list_dict = json.loads(content[1])
nsize = len(list_dict)
num = 0
box = []
info = ''
for i in range(nsize):
lin = list_dict[i]
info = lin['transcription']
info = info.replace(" ", "")
points = lin['points']
points = [list(x) for x in points]
print(points)
box = [b for a in points for b in a]
break
image = cv2.imread(image_path)
for i in range(5):
imgRotation, matRotation = dumpRotateImage(image,randint(-15,15)) # rotate
imgRotation = Image.fromarray(cv2.cvtColor(imgRotation, cv2.COLOR_BGR2RGB))
imgRotation=transform(imgRotation).convert('RGB')
imgRotation = cv2.cvtColor(np.asarray(imgRotation), cv2.COLOR_RGB2BGR)
pt1 = np.dot(matRotation, np.array([[box[0]], [box[1]], [1]]))
pt2 = np.dot(matRotation, np.array([[box[2]], [box[3]], [1]]))
pt3 = np.dot(matRotation, np.array([[box[4]], [box[5]], [1]]))
pt4 = np.dot(matRotation, np.array([[box[6]], [box[7]], [1]]))
print(int(pt4[0]))
if int(pt1[0])<5 or int(pt1[1])<5 or int(pt2[0])<5 or int(pt2[1])<5 or int(pt3[0])<5 or int(pt3[1])<5 or int(pt4[0])<5 or int(pt4[1])<5:
continue
result_info = [{
"transcription": info,
"points": [[int(pt1[0])+radom_p[randint(0, len(radom_p)-1)], int(pt1[1])+radom_p[randint(0, len(radom_p)-1)]], [int(pt2[0])+radom_p[randint(0, len(radom_p)-1)], int(pt2[1])+radom_p[randint(0, len(radom_p)-1)]],
[int(pt3[0])+radom_p[randint(0, len(radom_p)-1)], int(pt3[1])+radom_p[randint(0, len(radom_p)-1)]],
[int(pt4[0])+radom_p[randint(0, len(radom_p)-1)], int(pt4[1])+radom_p[randint(0, len(radom_p)-1)]]], "difficult": "false"}]
imag_d_path = "train/" + imag_name.split('.')[0] + "_" + str(i) + "_0726." + imag_name.split('.')[
1] + "\t" + str(result_info)+'\n'
print(imag_d_path)
cv2.imwrite('./train/' + imag_name.split('.')[0] + "_" + str(i) + "_0726." + imag_name.split('.')[1],
imgRotation)
w_label.write(imag_d_path.replace('\'','\"'))
6、 Delete the unmarked picture
Make a difference between the marked data and the list of pictures , Delete the redundant pictures .
import os
image_list = os.listdir('train/')
label_txt = "Label_new.txt"
label_list=[]
with open(label_txt, 'r') as label_s:
for line in label_s.readlines():
label_list.append(line.split('\t')[0].split('/')[1])
cha_list=list(set(image_list) - set(label_list))
for img in cha_list:
os.remove('train/'+img)
边栏推荐
猜你喜欢

Witness that the "decoding 2022 strong star of China's network security" is about to set sail
![[do you know cache - fully understand cache]](/img/65/df29f6f1ff6d0d119da4a3971db0fc.png)
[do you know cache - fully understand cache]

Record the nth SQL exception

excel整行删除,图片一起删除
![Today, let's talk about escape characters [cute new version]](/img/8a/5d60d362c5de42fac0b9abd0754241.png)
Today, let's talk about escape characters [cute new version]

【你了解Cache吗——全面理解高速缓冲存储器】

在腾讯测试岗干了5年,7月无情被辞,想给还在划水的兄弟提个醒.....
![[draw sherpinski triangle in C language]](/img/e6/9d1d088d1c7675c23725443000329b.png)
[draw sherpinski triangle in C language]

通过ensp让静态路由实现全网可达

C language - characters and strings, arithmetic operators, type conversions
随机推荐
Record the nth SQL exception
How to judge whether a number is odd or even?
[enchanting interpretation, 15 minutes let you thoroughly learn how to use the stack!!!]
Sort the three integers from large to small (introduce various methods in detail)
使用注解方式实现 Redis 分布式锁
JVM interview questions (necessary for interview)
【用C语言绘制直角坐标系】
[brother Yang takes you to play with the linear table (I) - sequence table]
Handsomeforum Learning Forum
[C language programming] branch structure
HCIP oSPF知识总结
NAT network address conversion experiment
The latest multi-threaded & highly concurrent learning materials, interview confidence
OSPF summary (mind map)
N methods of SQL optimization
Wechat applet: user wechat login process (attached: flow chart + source code)
Hcip day 3 Wan topology experiment
Error handling in golang
Redis安装及运行(linux)
Prometheus 运维工具 Promtool (三) Debug 功能