| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- import math
- import torch
- import torch.nn.functional as F
- import torch.distributed as dist
- from torchvision.ops.boxes import box_area
- # ------------------------- For loss -------------------------
- ## FocalLoss
- def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
- """
- Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
- Args:
- inputs: A float tensor of arbitrary shape.
- The predictions for each example.
- targets: A float tensor with the same shape as inputs. Stores the binary
- classification label for each element in inputs
- (0 for the negative class and 1 for the positive class).
- alpha: (optional) Weighting factor in range (0,1) to balance
- positive vs negative examples. Default = -1 (no weighting).
- gamma: Exponent of the modulating factor (1 - p_t) to
- balance easy vs hard examples.
- Returns:
- Loss tensor
- """
- prob = inputs.sigmoid()
- ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
- p_t = prob * targets + (1 - prob) * (1 - targets)
- loss = ce_loss * ((1 - p_t) ** gamma)
- if alpha >= 0:
- alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
- loss = alpha_t * loss
- return loss.mean(1).sum() / num_boxes
- ## Variable FocalLoss
- def varifocal_loss_with_logits(pred_logits,
- gt_score,
- label,
- normalizer=1.0,
- alpha=0.75,
- gamma=2.0):
- pred_score = F.sigmoid(pred_logits)
- weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
- loss = F.binary_cross_entropy_with_logits(pred_logits, gt_score, reduction='none')
- loss = loss * weight
- return loss.mean(1).sum() / normalizer
- ## InverseSigmoid
- def inverse_sigmoid(x, eps=1e-5):
- x = x.clamp(min=0, max=1)
- x1 = x.clamp(min=eps)
- x2 = (1 - x).clamp(min=eps)
- return torch.log(x1/x2)
- ## GIoU loss
- class GIoULoss(object):
- """ Modified GIoULoss from Paddle-Paddle"""
- def __init__(self, eps=1e-10, reduction='none'):
- self.eps = eps
- self.reduction = reduction
- assert reduction in ('none', 'mean', 'sum')
- def bbox_overlap(self, box1, box2, eps=1e-10):
- """calculate the iou of box1 and box2
- Args:
- box1 (Tensor): box1 with the shape (..., 4)
- box2 (Tensor): box1 with the shape (..., 4)
- eps (float): epsilon to avoid divide by zero
- Return:
- iou (Tensor): iou of box1 and box2
- overlap (Tensor): overlap of box1 and box2
- union (Tensor): union of box1 and box2
- """
- x1, y1, x2, y2 = box1
- x1g, y1g, x2g, y2g = box2
- xkis1 = torch.max(x1, x1g)
- ykis1 = torch.max(y1, y1g)
- xkis2 = torch.min(x2, x2g)
- ykis2 = torch.min(y2, y2g)
- w_inter = (xkis2 - xkis1).clip(0)
- h_inter = (ykis2 - ykis1).clip(0)
- overlap = w_inter * h_inter
- area1 = (x2 - x1) * (y2 - y1)
- area2 = (x2g - x1g) * (y2g - y1g)
- union = area1 + area2 - overlap + eps
- iou = overlap / union
- return iou, overlap, union
- def __call__(self, pbox, gbox):
- # x1, y1, x2, y2 = torch.split(pbox, 4, dim=-1)
- # x1g, y1g, x2g, y2g = torch.split(gbox, 4, dim=-1)
- x1, y1, x2, y2 = torch.chunk(pbox, 4, dim=-1)
- x1g, y1g, x2g, y2g = torch.chunk(gbox, 4, dim=-1)
- box1 = [x1, y1, x2, y2]
- box2 = [x1g, y1g, x2g, y2g]
- iou, _, union = self.bbox_overlap(box1, box2, self.eps)
- xc1 = torch.min(x1, x1g)
- yc1 = torch.min(y1, y1g)
- xc2 = torch.max(x2, x2g)
- yc2 = torch.max(y2, y2g)
- area_c = (xc2 - xc1) * (yc2 - yc1) + self.eps
- miou = iou - ((area_c - union) / area_c)
- giou = 1 - miou
- if self.reduction == 'none':
- loss = giou
- elif self.reduction == 'sum':
- loss = giou.sum()
- elif self.reduction == 'mean':
- loss = giou.mean()
- return loss
- # ------------------------- For box -------------------------
- def box_cxcywh_to_xyxy(x):
- x_c, y_c, w, h = x.unbind(-1)
- b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
- (x_c + 0.5 * w), (y_c + 0.5 * h)]
- return torch.stack(b, dim=-1)
- def box_xyxy_to_cxcywh(x):
- x0, y0, x1, y1 = x.unbind(-1)
- b = [(x0 + x1) / 2, (y0 + y1) / 2,
- (x1 - x0), (y1 - y0)]
- return torch.stack(b, dim=-1)
- def box_iou(boxes1, boxes2):
- area1 = box_area(boxes1)
- area2 = box_area(boxes2)
- lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
- rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
- wh = (rb - lt).clamp(min=0) # [N,M,2]
- inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
- union = area1[:, None] + area2 - inter
- iou = inter / union
- return iou, union
- def generalized_box_iou(boxes1, boxes2):
- """
- Generalized IoU from https://giou.stanford.edu/
- The boxes should be in [x0, y0, x1, y1] format
- Returns a [N, M] pairwise matrix, where N = len(boxes1)
- and M = len(boxes2)
- """
- # degenerate boxes gives inf / nan results
- # so do an early check
- assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
- assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
- iou, union = box_iou(boxes1, boxes2)
- lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
- rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
- wh = (rb - lt).clamp(min=0) # [N,M,2]
- area = wh[:, :, 0] * wh[:, :, 1]
- return iou - (area - union) / area
- def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
- """Modified from Paddle-paddle
- Args:
- box1 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
- box2 (list): [x, y, w, h], all have the shape [b, na, h, w, 1]
- giou (bool): whether use giou or not, default False
- diou (bool): whether use diou or not, default False
- ciou (bool): whether use ciou or not, default False
- eps (float): epsilon to avoid divide by zero
- Return:
- iou (Tensor): iou of box1 and box1, with the shape [b, na, h, w, 1]
- """
- px1, py1, px2, py2 = torch.chunk(box1, 4, -1)
- gx1, gy1, gx2, gy2 = torch.chunk(box2, 4, -1)
- x1 = torch.max(px1, gx1)
- y1 = torch.max(py1, gy1)
- x2 = torch.min(px2, gx2)
- y2 = torch.min(py2, gy2)
- overlap = ((x2 - x1).clamp(0)) * ((y2 - y1).clamp(0))
- area1 = (px2 - px1) * (py2 - py1)
- area1 = area1.clamp(0)
- area2 = (gx2 - gx1) * (gy2 - gy1)
- area2 = area2.clamp(0)
- union = area1 + area2 - overlap + eps
- iou = overlap / union
- if giou or ciou or diou:
- # convex w, h
- cw = torch.max(px2, gx2) - torch.min(px1, gx1)
- ch = torch.max(py2, gy2) - torch.min(py1, gy1)
- if giou:
- c_area = cw * ch + eps
- return iou - (c_area - union) / c_area
- else:
- # convex diagonal squared
- c2 = cw**2 + ch**2 + eps
- # center distance
- rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
- if diou:
- return iou - rho2 / c2
- else:
- w1, h1 = px2 - px1, py2 - py1 + eps
- w2, h2 = gx2 - gx1, gy2 - gy1 + eps
- delta = torch.atan(w1 / h1) - torch.atan(w2 / h2)
- v = (4 / math.pi**2) * torch.pow(delta, 2)
- alpha = v / (1 + eps - iou + v)
- alpha.requires_grad_ = False
- return iou - (rho2 / c2 + v * alpha)
- else:
- return iou
- # ------------------------- For distributed -------------------------
- def is_dist_avail_and_initialized():
- if not dist.is_available():
- return False
- if not dist.is_initialized():
- return False
- return True
- def get_world_size():
- if not is_dist_avail_and_initialized():
- return 1
- return dist.get_world_size()
|