当前位置:网站首页>12.物体检测Mask-Rcnn
12.物体检测Mask-Rcnn
2022-06-28 23:56:00 【C--G】
资源下载
https://github.com/matterport/Mask_RCNN


- balloon.py测试



理论讲解
整体框架

FPN层




# 处理残差模块
if callable(config.BACKBONE):
_, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True,
train_bn=config.TRAIN_BN)
else:
_, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE,
stage5=True, train_bn=config.TRAIN_BN)
# 上采样,相加
# Top-down Layers
# TODO: add assert to varify feature map sizes match what's in config
P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
P4 = KL.Add(name="fpn_p4add")([
KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)])
P3 = KL.Add(name="fpn_p3add")([
KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)])
P2 = KL.Add(name="fpn_p2add")([
KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)])
# p2到p5采用3x3 卷积,p6最大池化
# Attach 3x3 conv to all P layers to get the final feature maps.
P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2)
P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3)
P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4)
P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5)
# P6 is used for the 5th anchor scale in RPN. Generated by
# subsampling from P5 with stride of 2.
P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
# Note that P6 is used in RPN, but not in the classifier heads.
rpn_feature_maps = [P2, P3, P4, P5, P6]
mrcnn_feature_maps = [P2, P3, P4, P5]
generate_pyramid_anchors(候选框生成)

def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
anchor_stride):
"""Generate anchors at different levels of a feature pyramid. Each scale is associated with a level of the pyramid, but each ratio is used in all levels of the pyramid. Returns: anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted with the same order of the given scales. So, anchors of scale[0] come first, then anchors of scale[1], and so on. """
# Anchors
# [anchor_count, (y1, x1, y2, x2)]
anchors = []
for i in range(len(scales)):
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
feature_strides[i], anchor_stride))
return np.concatenate(anchors, axis=0)
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
""" scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] shape: [height, width] spatial shape of the feature map over which to generate anchors. feature_stride: Stride of the feature map relative to the image in pixels. anchor_stride: Stride of anchors on the feature map. For example, if the value is 2 then generate anchors for every other feature map pixel. """
# Get all combinations of scales and ratios
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
scales = scales.flatten()
ratios = ratios.flatten()
# Enumerate heights and widths from scales and ratios
heights = scales / np.sqrt(ratios)
widths = scales * np.sqrt(ratios)
# Enumerate shifts in feature space
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
# Enumerate combinations of shifts, widths, and heights
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
# Reshape to get a list of (y, x) and a list of (h, w)
box_centers = np.stack(
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
# Convert to corner coordinates (y1, x1, y2, x2)
boxes = np.concatenate([box_centers - 0.5 * box_sizes,
box_centers + 0.5 * box_sizes], axis=1)
return boxes
RPN层

def build_rpn_model(anchor_stride, anchors_per_location, depth):
"""Builds a Keras model of the Region Proposal Network. It wraps the RPN graph so it can be used multiple times with shared weights. anchors_per_location: number of anchors per pixel in the feature map anchor_stride: Controls the density of anchors. Typically 1 (anchors for every pixel in the feature map), or 2 (every other pixel). depth: Depth of the backbone feature map. Returns a Keras Model object. The model outputs, when called, are: rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be applied to anchors. """
input_feature_map = KL.Input(shape=[None, None, depth],
name="input_rpn_feature_map")
outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride)
return KM.Model([input_feature_map], outputs, name="rpn_model")
def rpn_graph(feature_map, anchors_per_location, anchor_stride):
"""Builds the computation graph of Region Proposal Network. feature_map: backbone features [batch, height, width, depth] anchors_per_location: number of anchors per pixel in the feature map anchor_stride: Controls the density of anchors. Typically 1 (anchors for every pixel in the feature map), or 2 (every other pixel). Returns: rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be applied to anchors. """
# TODO: check if stride of 2 causes alignment issues if the feature map
# is not even.
# Shared convolutional base of the RPN 共享卷积
shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu',
strides=anchor_stride,
name='rpn_conv_shared')(feature_map)
# Anchor Score. [batch, height, width, anchors per location * 2].
x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid',
activation='linear', name='rpn_class_raw')(shared)
# Reshape to [batch, anchors, 2]
rpn_class_logits = KL.Lambda(
lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x)
# Softmax on last dimension of BG/FG.
rpn_probs = KL.Activation(
"softmax", name="rpn_class_xxx")(rpn_class_logits)
# Bounding box refinement. [batch, H, W, anchors per location * depth]
# where depth is [x, y, log(w), log(h)]
x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid",
activation='linear', name='rpn_bbox_pred')(shared)
# Reshape to [batch, anchors, 4]
rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x)
return [rpn_class_logits, rpn_probs, rpn_bbox]
ProposalLayer层

DetectionTargetLayer层


class DetectionTargetLayer(KE.Layer):
"""Subsamples proposals and generates target box refinement, class_ids, and masks for each. Inputs: proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)] target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width] Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """
def __init__(self, config, **kwargs):
super(DetectionTargetLayer, self).__init__(**kwargs)
self.config = config
def call(self, inputs):
proposals = inputs[0]
gt_class_ids = inputs[1]
gt_boxes = inputs[2]
gt_masks = inputs[3]
# Slice the batch and run a graph for each slice
# TODO: Rename target_bbox to target_deltas for clarity
names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
outputs = utils.batch_slice(
[proposals, gt_class_ids, gt_boxes, gt_masks],
lambda w, x, y, z: detection_targets_graph(
w, x, y, z, self.config),
self.config.IMAGES_PER_GPU, names=names)
return outputs
def compute_output_shape(self, input_shape):
return [
(None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois
(None, self.config.TRAIN_ROIS_PER_IMAGE), # class_ids
(None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas
(None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0],
self.config.MASK_SHAPE[1]) # masks
]
def compute_mask(self, inputs, mask=None):
return [None, None, None, None]
ROI Align层


边栏推荐
- pymysql.Error 获取错误码与具体错误信息
- Typescript -- Section 7 enumeration
- Huawei's level 22 experts have worked hard for ten years to complete the advanced practical document of cloud native service grid. 6
- IO playback function of FIO
- window10 phpstudy 安装redis扩展
- stm32F407-------LCD
- 自动化测试的生命周期是什么?
- 三個pwn題
- Easy to use free ppt template
- ctfshow XSS
猜你喜欢

Typescript -- Section 2: variable declaration

stm32F407-------LCD

TypeScript --第三节:接口

Machine learning 6-decision tree

LinkedIn DataHub --- 经验分享

Matlab learning notes (6) upsample function and downsample function of MATLAB

Technology sharing | software development process that you must understand if you want to get started with testing
![[state machine design] Moore, Mealy state machine, three-stage, two-stage and one-stage state machine writing specification](/img/48/e29f34aff7cc437bfb574591d54e3d.png)
[state machine design] Moore, Mealy state machine, three-stage, two-stage and one-stage state machine writing specification

Typescript -- Section 1: basic types

Mobile heterogeneous computing technology - GPU OpenCL programming (basic)
随机推荐
How to make two objects or arrays equal
What pitfalls should be avoided in the job interview for the operation post in 2022?
window10 phpstudy 安装redis扩展
Analysis of CSRF Cross Site Request Forgery vulnerability
一条update语句到底加了多少锁?带你深入理解底层原理
stm32F407-------GPIO输入实验
stm32F407-------串行(串口)通信
PHP利用CURL实现登录网站后下载Excel文件
Reading notes of English grammar new thinking Basic Edition 2 (I)
入行数字IC验证后会做些什么?
Stm32f407 ------ running lamp and buzzer
stm32F407-------LCD
PHP函数file_get_contents与操作系统的内存映射
百度知道爬虫,根据问题id,线索id,评论id获取评论下面的对话
[C Prime plus chapitre II Questions de programmation après la Classe]
Stm32f407 ------- IO pin multiplexing mapping
【软件分析】软件分析、设计与建模迭代式详解
Save data in Excel: use openpyxl to create multiple tables and set excel row limit
Technology sharing | software development process that you must understand if you want to get started with testing
ctfshow XSS