当前位置：网站首页>Introduction to mmcv common APIs

Introduction to mmcv common APIs

2022-06-30 05:13:00 【Wu lele~】

List of articles

Preface
1、 Pre Basics
2、mmcv
summary

Preface

This article is mainly about mmdet Often used in mmcv some API Make an introduction .

1、 Pre Basics

mmcv Contains a large number of image processing functions , The two most commonly used libraries are cv2 and pillow. therefore , Commonly used for these two libraries API Make a brief introduction .

1.1. Read images

import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt

# h>w Image : (1133, 800, 3)
img_path = '/home/wujian/mmdet-lap/data/coco/val2017/000001.jpg'

img = cv2.imread(img_path)
h,w = img.shape[:2]
print('h:', h, 'w:',w)

img = Image.open(img_path)
w,h = img.size
print('w:', w, 'h:',h)

Be careful cv2 What is returned is the... Of the image h and w, and pil What is returned is the... Of the image w and h！！

1.2. cv2 and pil Reciprocal transformation

import cv2
import numpy as np
from PIL import Image
# cv2 --> pil
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# pil --> cv2
image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

1.3. Turn into pil Visualizing

Generally in IDE Code in , So turn to PIL More convenient for visualization , Post down visualization pil Image code ：

from PIL import Image
import matplotlib.pyplot as plt

img = open(img_path)
plt.imshow(img)
plt.show()

1.4. cv2 and pil Save image

Just note that the absolute path is saved .

cv2.imwrite('abs_path', img) # img It is the classics. cv2.imread Read the 
img.save('abs_path')      # img  It is the classics.  Image.open() Read the

2、mmcv

Post here mmdet Data set processing fields commonly used in ：

train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]

2.1. Transform image size Resize

Two pictures are selected respectively h>w and w>h The image of Resize Transformation ,mmdet The middle transform operation is to make the smaller side become the specified side , Then on the other side scale The zoom . Of course , After the transformation and the original image h and w The size order of does not change .

from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from mmcv.image import imrescale

# h>w Image : (1133, 800, 3), Visualize the first image 
img_path = '/home/wujian/mmdet-lap/data/coco/val2017/000001.jpg'
img = cv2.imread(img_path)
h,w = img.shape[:2]

img, new_scale = imrescale(img, scale=(1333,800), return_scale= True)
print(img.shape)
# cv2 --> pil
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.imshow(img)
plt.show()

# w>h Image  :(800, 1067, 3),  Visualize the second image 
img_path = '/home/wujian/mmdet-lap/data/coco/val2017/000003.jpg'
img = cv2.imread(img_path)
h,w = img.shape[:2]

img, new_scale = imrescale(img, scale=(1333,800), return_scale= True)
print(img.shape)
# cv2 --> pil
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

plt.imshow(img)
plt.show()

Insert picture description here

2.2. Fill in the image

stay Resize On the basis of , Pad The operation is to fill the width and height so that both sides become 32 Multiple . Post the total code ：

import cv2
from PIL import Image, ImageDraw
import matplotlib.pyplot as plt
from mmcv.image import imrescale

# h>w Image : (1133, 800, 3)
img_path = '/home/wujian/mmdet-lap/data/coco/val2017/000001.jpg'
img = cv2.imread(img_path)
h,w = img.shape[:2]

img, new_scale = imrescale(img, scale=(1333,800), return_scale= True)
print(img.shape)
# cv2 --> pil
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
plt.imshow(img)
plt.show()

#pad
from mmcv.image import impad_to_multiple
import numpy as np
# pil --> cv2
image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
pad_img = impad_to_multiple(image, divisor=32, pad_val= 0)
print(pad_img.shape)
pad_img = Image.fromarray(cv2.cvtColor(pad_img, cv2.COLOR_BGR2RGB))
plt.imshow(pad_img)
plt.show()


# w>h Image  :(800, 1067, 3)
img_path = '/home/wujian/mmdet-lap/data/coco/val2017/000003.jpg'
img = cv2.imread(img_path)
h,w = img.shape[:2]

img, new_scale = imrescale(img, scale=(1333,800), return_scale= True)
print(img.shape)
# cv2 --> pil
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

plt.imshow(img)
plt.show()

#pad
from mmcv.image import impad_to_multiple
import numpy as np
# pil --> cv2
image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
pad_img = impad_to_multiple(image, divisor=32, pad_val= 0)
print(pad_img.shape)
pad_img = Image.fromarray(cv2.cvtColor(pad_img, cv2.COLOR_BGR2RGB))
plt.imshow(pad_img)
plt.show()