Source code for faster_rcnn.rpn_msr.anchor_target_layer

import torch.nn as nn
import numpy as np
import numpy.random as npr
from .generate_anchors import generate_anchors
from ..utils.cython_bbox import bbox_overlaps
from ..fastrcnn.bbox_transform import bbox_transform
from ..config import cfg
from torch import Tensor
import torch
from ..network import np_to_tensor
import logging

logger = logging.getLogger("root")
logger.setLevel(logging.DEBUG)


[docs]class AnchorTargerLayer(nn.Module):

    """Calculate target for RPN network

    Attributes
    ----------
    is_cuda : bool
        Using GPU or not
    """

[docs]    def __init__(self, feat_stride, anchor_scales, is_cuda=True):
        """Summary

        Parameters
        ----------
        feat_stride : class::`numpy.array`
            The Ratio between original image size and the convolutional feature (feature maps),
            Used to calculate anchors from a point in convolutional feature into the original image.

            Example: np.array([16. ,])
        anchor_scales : class::`numpy.array`
            Description
        is_cuda : bool, optional
            Description
        """
        super(AnchorTargerLayer, self).__init__()
        self._feat_stride = feat_stride
        self._anchor_scales = anchor_scales
        self._anchors = generate_anchors(scales=np.array(self._anchor_scales))
        self._num_anchors = self._anchors.shape[0]
        self.is_cuda = is_cuda

        # allow boxes to sit over the edge by a small amount
        self._allow_border = 0

[docs]    def forward(self, rpn_cls_score, gt_boxes, batch_boxes_index, im_info):
        """ Generate all anchors, then filter anchors that lays outside original images. Calculate target base on overlaps values and bbox regression.

        Parameters
        ----------
        rpn_cls_score: :class:`torch.Tensor`
            The probability of each anchor contains object center
        gt_boxes: :class:`numpy.array`
            List all ground truth boxes across all the images in batch
        batch_boxes_index: :class:`numpy.array`
            Batch index where image belong to.
        im_info: :class:`torch.Tensor([[im_height, im_width]])`
            Original Image size

        Returns
        -------
        (:class:`torch.Tensor`, :class:`torch.Tensor`, :class:`torch.Tensor`, :class:`torch.Tensor`)
            Return labels, bbox_targets, bbox_inside_weights, bbox_outside_weights



        """

        # gt_boxes = gt_boxes.numpy()
        # im_info = im_info.numpy()
        # batch_boxes_index = batch_boxes_index.numpy()
        batch_boxes = gt_boxes[:, :4]

        feature_height, feature_width = rpn_cls_score.shape[2], rpn_cls_score.shape[3]
        batch_size = rpn_cls_score.shape[0]
        im_height, im_width = im_info[0][0], im_info[0][1]

        A = self._num_anchors

        # 1. Generate proposal from bbox deltas and shifted anchors
        all_anchors = self._create_anchors(feature_height, feature_width)
        total_anchors = all_anchors.shape[0]

        # only keep anchors inside the image
        inside_anchors, inside_anchor_indexes = self._filter_outside_anchors(
            all_anchors, im_height, im_width)

        # 2. Calculate overlap and assign corresponding label

        bbox_inside_weights = np.zeros(
            (batch_size, inside_anchor_indexes.shape[0], 4), dtype=np.float32)
        bbox_outside_weights = np.zeros(
            (batch_size, inside_anchor_indexes.shape[0], 4), dtype=np.float32)

        labels, bbox_targets = self.calculate_target(
            inside_anchors, batch_size, inside_anchor_indexes, batch_boxes, batch_boxes_index)

        # 3. calculate bbox_inside_weights, bbox_outside_weights
        bbox_inside_weights[labels == 1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS

        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)

        sum_fg = np.sum(labels == 1, axis=1)
        sum_bg = np.sum(labels == 0, axis=1)

        for i in range(batch_size):
            current_batch_sum_fg = sum_fg[i]
            current_batch_sum_bg = sum_bg[i]
            current_batch_fg_index = np.where(labels[i] == 1)[0]
            current_batch_bg_index = np.where(labels[i] == 0)[0]
            if current_batch_sum_fg > num_fg:
                disable_inds = npr.choice(
                    current_batch_fg_index, size=(current_batch_sum_fg - num_fg), replace=False)
                labels[i][disable_inds] = -1

                # subsample negative labels if we have too many

            num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels[i] == 1)
            if current_batch_sum_bg > num_bg:
                disable_inds = npr.choice(
                    current_batch_bg_index, size=(current_batch_sum_bg - num_bg), replace=False)

                labels[i][disable_inds] = -1

        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            num_examples = np.sum(labels[i] >= 0)
            positive_weights = 1.0 / num_examples
            negative_weights = 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (
                cfg.TRAIN.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1) + 1))
            negative_weights = (
                (1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0) + 1))

        bbox_outside_weights[labels == 1] = np.array([positive_weights] * 4)
        bbox_outside_weights[labels == 0] = np.array([negative_weights] * 4)

        labels = self._unmap(labels, total_anchors,
                             inside_anchor_indexes, batch_size, fill=-1)
        bbox_targets = self._unmap(bbox_targets, total_anchors,
                                   inside_anchor_indexes, batch_size, fill=0)
        bbox_inside_weights = self._unmap(
            bbox_inside_weights, total_anchors, inside_anchor_indexes, batch_size, fill=0)
        bbox_outside_weights = self._unmap(
            bbox_outside_weights, total_anchors, inside_anchor_indexes, batch_size, fill=0)

        labels = labels.reshape((batch_size, feature_height, feature_width, A))
        labels = labels.transpose((0, 3, 1, 2))
        labels = labels.reshape(
            (batch_size, 1, A * feature_height, feature_width))

        bbox_targets = bbox_targets.reshape(
            (batch_size, feature_height, feature_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_inside_weights = bbox_inside_weights.reshape(
            (batch_size, feature_height, feature_width, A * 4)).transpose((0, 3, 1, 2))

        bbox_outside_weights = bbox_outside_weights.reshape(
            (batch_size, feature_height, feature_width, A * 4)).transpose((0, 3, 1, 2))

        return np_to_tensor(labels, self.is_cuda, dtype=torch.LongTensor), np_to_tensor(bbox_targets, self.is_cuda), np_to_tensor(bbox_inside_weights, self.is_cuda), np_to_tensor(bbox_outside_weights, self.is_cuda)

    def backward(self, top, propagate_down, bottom):
        pass

    def reshape(self, bottom, top):
        pass

[docs]    def _create_anchors(self, feature_height, feature_width):
        """Create all anchors given features height, width.

        Parameters
        ----------
        feature_height : int
            feature map height
        feature_width : int
            feature map width

        Returns
        -------
        :class:`numpy.array`
            Anchors
        """
        shift_x = np.arange(0, feature_width) * self._feat_stride
        shift_y = np.arange(0, feature_height) * self._feat_stride

        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # generate shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]

        # move to specific gpu.
        # self._anchors = self._anchors.type_as(gt_boxes)

        # add bbox deltas to shifted anchors to get proposal
        all_anchors = (self._anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        return all_anchors

[docs]    def _filter_outside_anchors(self, all_anchors, im_height, im_width):
        """Remove outside anchors from generated anchors

        Parameters
        ----------
        all_anchors : :class:`numpy.array`
            All generated anchors
        im_height : int
            Origin image height.
        im_width : int
            Origin image width

        Returns
        -------
        tuple(inside_anchors, index_of_inside_anchors)
            Return all inside anchors and its indexes
        """
        inds_inside = np.where(
            (all_anchors[:, 0] >= - self._allow_border) &
            (all_anchors[:, 1] >= - self._allow_border) &
            (all_anchors[:, 2] < im_width + self._allow_border) &  # width
            (all_anchors[:, 3] < im_height + self._allow_border)  # height
        )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        return anchors, inds_inside

    def _unmap(self, data, count, inds, batch_size, fill=0):
        if len(data.shape) == 2:
            ret = np.empty((batch_size, count), dtype=np.float32)
            ret.fill(fill)
            ret[:, inds] = data
        else:
            ret = np.empty(
                (batch_size, count, data.shape[2]), dtype=np.float32)
            ret.fill(fill)
            ret[:, inds, :] = data
        return ret

[docs]    def calculate_target(self, inside_anchors, batch_size, inside_anchor_indexes, batch_boxes, batch_boxes_index):
        """Calculate bbox_targets and layer


        Notes
        -----
        |   Create empty label array.
        |   - `label`:

            >>> label.shape
            (A, batch_size)

        |   For each batch, there are `A` anchors and `G` batch boxes:
        |
        |   `current_batch_overlaps`: overlaps between anchors and boxes

            >>> current_batch_overlaps.shape
            (A, G)

        |    `argmax_overlaps` : List index of boxes, that have largest overlap w.r.t each Anchor.

            >>> argmax_overlaps.shape
            (A, 1)

        |    `max_overlaps` : List of largest overlap values between boxes w.r.t each Anchor.

            >>> max_overlaps.shape
            (A, 1)

        |    Set current batch `label` values:

            >>> labels[i, max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            >>> labels[i, max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        Parameters
        ----------
        inside_anchors : :class:`numpy.array`
            List all anchors that lay inside the original images.
            Shape: [number_of_anchor * 4]
        batch_size : int
            Current batch size
        inside_anchor_indexes : :class:`numpy.array`
            Indexes of inside anchors.
            Shape: [number_of_anchor * 1]
        batch_boxes : :class:`numpy.array`
            list all ground truth boxes across all the images in batch
            example:

            >>> batch_boxes
            [[  48.57142857  465.71428571  537.14285714 1774.28571429]
             [ 220.         1065.71428571  382.85714286 1771.42857143]
             [ 326.76056338  315.49295775  394.36619718  580.28169014]
             [  76.05633803  290.14084507  242.25352113  788.73239437]
             [  11.26760563    8.45070423  585.91549296 1769.01408451]
             [ 178.125       221.875       287.5         975.        ]
             [ 321.875       334.375       434.375       984.375     ]]

        batch_boxes_index : list[Int]
            |  Batch index where image belong to.
            |  Example:
            |  There 3 images in current batch, and 6 boxes inside that 3 images. Look at the `batch_boxes_index` we know:
            |  First 3 boxes belong to first image.
            |  Next 2 boxes belong to second image.
            |  Last box belong to last image.

            >>> [0, 0, 0, 1, 1, 2]

        Returns
        -------
        (:class:`numpy.array((A, batch_size))`, :class:`numpy.array((batch_size, A, 4))`)
            Return caculated labels , and bbox_targers
        """
        labels = np.empty(
            (batch_size, inside_anchor_indexes.shape[0]), dtype=np.float32)
        bbox_targets = np.zeros(
            (batch_size, inside_anchor_indexes.shape[0], 4), dtype=np.float32)
        labels.fill(-1)

        overlaps = bbox_overlaps(inside_anchors.astype(
            np.float), batch_boxes.astype(np.float))

        for i in range(batch_size):
            current_batch_overlaps = overlaps[:, batch_boxes_index == i]
            current_batch_boxes = batch_boxes[[batch_boxes_index == i]]

            argmax_overlaps = current_batch_overlaps.argmax(axis=1)  # (A)
            max_overlaps = current_batch_overlaps[np.arange(
                inside_anchor_indexes.shape[0]), argmax_overlaps]
            gt_argmax_overlaps = current_batch_overlaps.argmax(axis=0)  # G
            gt_max_overlaps = current_batch_overlaps[gt_argmax_overlaps, np.arange(
                current_batch_overlaps.shape[1])]
            gt_argmax_overlaps = np.where(
                current_batch_overlaps == gt_max_overlaps)[0]

            if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels first so that positive labels can clobber them
                labels[i, max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

            # fg label: for each gt, anchor with highest overlap
            labels[i, gt_argmax_overlaps] = 1

            # fg label: above threshold IOU
            labels[i, max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1


            if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
                # assign bg labels last so that negative labels can clobber positives
                labels[i, max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            current_batch_bbox_targets = bbox_transform(inside_anchors.astype(
                np.float), current_batch_boxes[argmax_overlaps, :]).astype(np.float32, copy=False)
            bbox_targets[i, :] = current_batch_bbox_targets

        return labels, bbox_targets
Source code for faster_rcnn.rpn_msr.anchor_target_layer

Faster RCNN

Navigation

Related Topics