当前位置：网站首页>Anchorgenerator for mmdet line by line interpretation

Anchorgenerator for mmdet line by line interpretation

2022-06-30 08:41:00 【Wu lele~】

List of articles

Preface
1、base_anchors Generation
2、grid_anchors Generation
3、valid_flags Introduce
summary

Preface

This paper mainly introduces mmdet/core/anchor/anchor_generator.py Under the document AnchorGenerator class . With RetinaNet Configuration of as a description .

anchor_generator_cfg = dict(
    type='AnchorGenerator',
    octave_base_scale=4,        # base_anchor Size 
    scales_per_octave=3,        #  Every base_anchor Yes 3 A proportion 
    ratios=[0.5, 1.0, 2.0],     #  Every anchor Aspect ratio 
    strides=[8, 16, 32, 64, 128]) #  Step size of each characteristic graph , It can be understood as receptive field or down sampling rate .

1、base_anchors Generation

So-called base_anchors It's initializing AnchorGenerator Class aid gen_base_anchors The method produces the basic 9 individual anchor, these anchor It's from the original drawing anchor.

@ANCHOR_GENERATORS.register_module()
class AnchorGenerator(object):
    def __init__(self,
                 strides,                 #  Of five characteristic graphs base_anchor_size:[8, 16, 32, 64, 128]
                 ratios,                  # anchor Three aspect ratios [0.5, 1.0, 2.0]
                 scales=None,             # None
                 base_sizes=None,         # None
                 scale_major=True,        # True
                 octave_base_scale=None,  # 4
                 scales_per_octave=None,  # 3
                 centers=None,
                 center_offset=0.):
        # [(8,8),(16,16),(32,32),(64,64),(128,128)]
        self.strides = [_pair(stride) for stride in strides]
        # [8, 16, 32, 64, 128] because stride It means the size of the receptive field , So the code named it base_sizes.
        self.base_sizes = [min(stride) for stride in self.strides
                           ] if base_sizes is None else base_sizes
        # octave These two parameters and scales Can't coexist 
        assert ((octave_base_scale is not None
                and scales_per_octave is not None) ^ (scales is not None)), \
            'scales and octave_base_scale with scales_per_octave cannot' \
            ' be set at the same time'
        if scales is not None:
            self.scales = torch.Tensor(scales)
        # anchor Three sizes of ： octave_base_scale * [2**0, 2**(1/3), 2**(2/3)] = [4,5,6]
        elif octave_base_scale is not None and scales_per_octave is not None:
            octave_scales = np.array(
                [2**(i / scales_per_octave) for i in range(scales_per_octave)])
            scales = octave_scales * octave_base_scale
            self.scales = torch.Tensor(scales)
        #  Call to generate anchor Methods 
        self.base_anchors = self.gen_base_anchors()

Now let's take a look at gen_base_anchors Method ：

    def gen_base_anchors(self):
        """Generate base anchors Returns: list(torch.Tensor): Base anchors of a feature grid in multiple feature levels. """
        multi_level_base_anchors = []               #  Storing five characteristic graphs base_anchors
        for i, base_size in enumerate(self.base_sizes):
            center = None
            if self.centers is not None:
                center = self.centers[i]
            multi_level_base_anchors.append(
                self.gen_single_level_base_anchors( #  Call the... Of the current characteristic graph base_anchors
                    base_size,                      # 8 / 16 / 32 /64 /128
                    scales=self.scales,             # [4,5,6]
                    ratios=self.ratios,             # [0.5,1,2]
                    center=center))
        return multi_level_base_anchors

    def gen_single_level_base_anchors(self,
                                      base_size,     # 8
                                      scales,        # [4,5,6]
                                      ratios,        # [0.5,1,2]
                                      center=None):

        w = base_size   # w = 8
        h = base_size   # h = 8
        if center is None:
            x_center = self.center_offset * w   # 0
            y_center = self.center_offset * h   # 0
        else:
            x_center, y_center = center
		#  Get... Separately 9 Seed width and height 
        h_ratios = torch.sqrt(ratios)           #  The square root of a high proportion 
        w_ratios = 1 / h_ratios                 
        if self.scale_major:
        	# 8 * ([3,1]) * ([1,3]) = 9 individual w
            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)# [strid * w_ratio * scales]
            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)
        else:
            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)
            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)

        #  take [cx,cy,w,h] --> [xmin, ymin, xmax,ymax]
        base_anchors = [
            x_center - 0.5 * ws, y_center - 0.5 * hs, x_center + 0.5 * ws,
            y_center + 0.5 * hs
        ]
        base_anchors = torch.stack(base_anchors, dim=-1)  #  The stack 9 individual anchor.
 
        return base_anchors

In fact, the above code is what the following figure does ： Namely stride * scales* ratios = 9
Insert picture description here

2、grid_anchors Generation

It's generating base_anchor On the basis of , Then you need to change each anchor To broadcast the entire feature map . With grid_anchors Method realization ：

    def grid_anchors(self, featmap_sizes, device='cuda'):
        assert self.num_levels == len(featmap_sizes)
        multi_level_anchors = []
        for i in range(self.num_levels):
            anchors = self.single_level_grid_anchors(    #  The method of a single characteristic graph is called internally 
                self.base_anchors[i].to(device),
                featmap_sizes[i],
                self.strides[i],
                device=device)
            multi_level_anchors.append(anchors)
        return multi_level_anchors

Stick it down single_level_grid_anchors Method

    def _meshgrid(self, x, y, row_major=True):
        """Generate mesh grid of x and y Args: x (torch.Tensor): Grids of x dimension. y (torch.Tensor): Grids of y dimension. row_major (bool, optional): Whether to return y grids first. Defaults to True. Returns: tuple[torch.Tensor]: The mesh grids of x and y. """
        xx = x.repeat(len(y))  #  take x repeat len(y) Time . [0,1,2,0,1,2]
        yy = y.view(-1, 1).repeat(1, len(x)).view(-1) # [0,0,0,1,1,1]
        if row_major:
            return xx, yy
        else:
            return yy, xx
            
    def single_level_grid_anchors(self,
                                  base_anchors,
                                  featmap_size,
                                  stride=(16, 16),
                                  device='cuda'):
        """Generate grid anchors of a single level. Note: This function is usually called by method ``self.grid_anchors``. Args: base_anchors (torch.Tensor): The base anchors of a feature grid. featmap_size (tuple[int]): Size of the feature maps. stride (tuple[int], optional): Stride of the feature map. Defaults to (16, 16). device (str, optional): Device the tensor will be put on. Defaults to 'cuda'. Returns: torch.Tensor: Anchors in the overall feature maps. """
        feat_h, feat_w = featmap_size  #  Get the width and height of the current feature map 
        #  because *stride, Therefore, the offset from the original image is generated ：[0, stride[0], 2*stride[0]...]
        shift_x = torch.arange(0, feat_w, device=device) * stride[0]  
        shift_y = torch.arange(0, feat_h, device=device) * stride[1]
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)  # Combine the binary coordinates in the generated image (xx,yy)
        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) #  The stack 
        shifts = shifts.type_as(base_anchors)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)
		# base_anchor yes [x,y,x,y] Format , Therefore, the offset can be added directly .
        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.view(-1, 4)
        return all_anchors

3、valid_flags Introduce

Simply put, this method works ： During model batch training , Images are often pad,pad There will be black edges , Scatter behind anchor Will be in pad Part of it is sprinkled back anchor, In fact, this part anchor We should ignore . So this function is to give each anchor A label , if anchor At the effective pixel position , be Ture; Otherwise, Fu is FALSE.

    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):
        """  Input the original dimension and pad Rear size  Return: list(torch.Tensor): Return one and anchor Equal in quantity bool Type tensor  """
        assert self.num_levels == len(featmap_sizes)
        multi_level_flags = []
        for i in range(self.num_levels):
            anchor_stride = self.strides[i]
            feat_h, feat_w = featmap_sizes[i]
            h, w = pad_shape[:2]
            valid_feat_h = min(int(np.ceil(h / anchor_stride[0])), feat_h) #  Get valid width and height 
            valid_feat_w = min(int(np.ceil(w / anchor_stride[1])), feat_w)
            flags = self.single_level_valid_flags((feat_h, feat_w),        #  Traverse the characteristic graph of each layer 
                                                  (valid_feat_h, valid_feat_w),
                                                  self.num_base_anchors[i],# 9 individual 
                                                  device=device)
            multi_level_flags.append(flags)
        return multi_level_flags

    def single_level_valid_flags(self,
                                 featmap_size,
                                 valid_size,
                                 num_base_anchors,
                                 device='cuda'):
        """Generate the valid flags of anchor in a single feature map Args: featmap_size (tuple[int]):  Original feature map  valid_size (tuple[int]): pad Rear effective dimension  num_base_anchors (int): 9 device (str, optional): Device where the flags will be put on. Defaults to 'cuda'. Returns: torch.Tensor: The valid flags of each anchor in a single level feature map. """
        feat_h, feat_w = featmap_size
        valid_h, valid_w = valid_size
        assert valid_h <= feat_h and valid_w <= feat_w
        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) #  The assignment is FALSE
        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
        valid_x[:valid_w] = 1   #  take valid_w The previous assignment is Ture.
        valid_y[:valid_h] = 1
        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) #  Also get the two-dimensional coordinates 
        valid = valid_xx & valid_yy                           #  Only for Ture The position of is the effective range 
        valid = valid[:, None].expand(valid.size(0),          #  Expand it into a foundation anchor Of 9 Times 
                                      num_base_anchors).contiguous().view(-1)
        return valid