| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- import math
- import torch
- import torch.nn.functional as F
- import torch.distributed as dist
- from torchvision.ops.boxes import box_area
- # ------------------------- For loss -------------------------
- ## FocalLoss
- def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
- """
- Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
- Args:
- inputs: A float tensor of arbitrary shape.
- The predictions for each example.
- targets: A float tensor with the same shape as inputs. Stores the binary
- classification label for each element in inputs
- (0 for the negative class and 1 for the positive class).
- alpha: (optional) Weighting factor in range (0,1) to balance
- positive vs negative examples. Default = -1 (no weighting).
- gamma: Exponent of the modulating factor (1 - p_t) to
- balance easy vs hard examples.
- Returns:
- Loss tensor
- """
- prob = inputs.sigmoid()
- ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
- p_t = prob * targets + (1 - prob) * (1 - targets)
- loss = ce_loss * ((1 - p_t) ** gamma)
- if alpha >= 0:
- alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
- loss = alpha_t * loss
- return loss.mean(1).sum() / num_boxes
- # ------------------------- For box -------------------------
- def box_cxcywh_to_xyxy(x):
- x_c, y_c, w, h = x.unbind(-1)
- b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
- (x_c + 0.5 * w), (y_c + 0.5 * h)]
- return torch.stack(b, dim=-1)
- def box_xyxy_to_cxcywh(x):
- x0, y0, x1, y1 = x.unbind(-1)
- b = [(x0 + x1) / 2, (y0 + y1) / 2,
- (x1 - x0), (y1 - y0)]
- return torch.stack(b, dim=-1)
- def bbox2delta(proposals, gt, means=(0., 0., 0., 0.), stds=(1., 1., 1., 1.)):
- # hack for matcher
- if proposals.size() != gt.size():
- proposals = proposals[:, None]
- gt = gt[None]
- proposals = proposals.float()
- gt = gt.float()
- px, py, pw, ph = proposals.unbind(-1)
- gx, gy, gw, gh = gt.unbind(-1)
- dx = (gx - px) / (pw + 0.1)
- dy = (gy - py) / (ph + 0.1)
- dw = torch.log(gw / (pw + 0.1))
- dh = torch.log(gh / (ph + 0.1))
- deltas = torch.stack([dx, dy, dw, dh], dim=-1)
- means = deltas.new_tensor(means).unsqueeze(0)
- stds = deltas.new_tensor(stds).unsqueeze(0)
- deltas = deltas.sub_(means).div_(stds)
- return deltas
- def box_iou(boxes1, boxes2):
- area1 = box_area(boxes1)
- area2 = box_area(boxes2)
- lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
- rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
- wh = (rb - lt).clamp(min=0) # [N,M,2]
- inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
- union = area1[:, None] + area2 - inter
- iou = inter / union
- return iou, union
- def generalized_box_iou(boxes1, boxes2):
- """
- Generalized IoU from https://giou.stanford.edu/
- The boxes should be in [x0, y0, x1, y1] format
- Returns a [N, M] pairwise matrix, where N = len(boxes1)
- and M = len(boxes2)
- """
- # degenerate boxes gives inf / nan results
- # so do an early check
- assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
- assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
- iou, union = box_iou(boxes1, boxes2)
- lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
- rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
- wh = (rb - lt).clamp(min=0) # [N,M,2]
- area = wh[:, :, 0] * wh[:, :, 1]
- return iou - (area - union) / area
- # ------------------------- For distributed -------------------------
- def is_dist_avail_and_initialized():
- if not dist.is_available():
- return False
- if not dist.is_initialized():
- return False
- return True
- def get_world_size():
- if not is_dist_avail_and_initialized():
- return 1
- return dist.get_world_size()
|