1 year ago · 03540ca7ae
--- a/yolo/config/__init__.py
+++ b/yolo/config/__init__.py
@@ -9,6 +9,9 @@ from .yolov6_config  import build_yolov6_config
 
															 from .yolov7_config  import build_yolov7_config
														
 
															 from .yolov8_config  import build_yolov8_config
														
 
															 from .yolov9_config  import build_yolov9_config
														
 
															+
														
 
															+from .yolof_config   import build_yolof_config
														
 
															+from .fcos_config    import build_fcos_config
														
 
															 from .rtdetr_config  import build_rtdetr_config
														
@@ -38,6 +41,10 @@ def build_config(args):
 
															         cfg = build_yolov9_config(args)
														
 
															     # ----------- RT-DETR -----------
														
 
															+    elif 'yolof' in args.model:
														
 
															+        cfg = build_yolof_config(args)
														
 
															+    elif 'fcos' in args.model:
														
 
															+        cfg = build_fcos_config(args)
														
 
															     elif 'rtdetr' in args.model:
														
 
															         cfg = build_rtdetr_config(args)
														
--- a/yolo/config/fcos_config.py
+++ b/yolo/config/fcos_config.py
@@ -0,0 +1,107 @@
 
															+# Fcos Config
														
 
															+
														
 
															+
														
 
															+def build_fcos_config(args):
														
 
															+    if args.model == 'fcos_r18':
														
 
															+        return FcosR18Config()
														
 
															+    else:
														
 
															+        raise NotImplementedError("No config for model: {}".format(args.model))
														
 
															+    
														
 
															+# Fcos-Base config
														
 
															+class FcosBaseConfig(object):
														
 
															+    def __init__(self) -> None:
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.out_stride = [8, 16, 32, 64]
														
 
															+        self.max_stride = 64
														
 
															+        ## Backbone
														
 
															+        self.backbone = 'resnet50'
														
 
															+        self.use_pretrained = True
														
 
															+        ## Head
														
 
															+        self.head_dim  = 256
														
 
															+        self.num_cls_head = 4
														
 
															+        self.num_reg_head = 4
														
 
															+
														
 
															+        # ---------------- Post-process config ----------------
														
 
															+        ## Post process
														
 
															+        self.val_topk = 1000
														
 
															+        self.val_conf_thresh = 0.05
														
 
															+        self.val_nms_thresh  = 0.6
														
 
															+        self.test_topk = 100
														
 
															+        self.test_conf_thresh = 0.3
														
 
															+        self.test_nms_thresh  = 0.45
														
 
															+
														
 
															+        # ---------------- Assignment config ----------------
														
 
															+        ## Matcher
														
 
															+        self.center_sampling_radius = 1.5
														
 
															+        self.object_sizes_of_interest = [[-1, 64], [64, 128], [128, 256], [256, 512], [512, float('inf')]]
														
 
															+
														
 
															+        ## Loss weight
														
 
															+        self.focal_loss_alpha = 0.25
														
 
															+        self.focal_loss_gamma = 2.0
														
 
															+        self.loss_cls = 1.0
														
 
															+        self.loss_reg = 1.0
														
 
															+        self.loss_ctn = 1.0
														
 
															+
														
 
															+        # ---------------- ModelEMA config ----------------
														
 
															+        self.use_ema   = True
														
 
															+        self.ema_decay = 0.9998
														
 
															+        self.ema_tau   = 2000
														
 
															+
														
 
															+        # ---------------- Optimizer config ----------------
														
 
															+        self.trainer      = 'yolo'
														
 
															+        self.optimizer    = 'adamw'
														
 
															+        self.base_lr      = 0.001     # base_lr = per_image_lr * batch_size
														
 
															+        self.min_lr_ratio = 0.01      # min_lr  = base_lr * min_lr_ratio
														
 
															+        self.batch_size_base = 64
														
 
															+        self.momentum     = 0.9
														
 
															+        self.weight_decay = 0.05
														
 
															+        self.clip_max_norm   = 35.0
														
 
															+        self.warmup_bias_lr  = 0.1
														
 
															+        self.warmup_momentum = 0.8
														
 
															+
														
 
															+        # ---------------- Lr Scheduler config ----------------
														
 
															+        self.warmup_epoch = 3
														
 
															+        self.lr_scheduler = "cosine"
														
 
															+        self.max_epoch    = 150
														
 
															+        self.eval_epoch   = 10
														
 
															+        self.no_aug_epoch = -1
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.aug_type = 'yolo'
														
 
															+        self.mosaic_prob = 0.0
														
 
															+        self.mixup_prob  = 0.0
														
 
															+        self.copy_paste  = 0.0          # approximated by the YOLOX's mixup
														
 
															+        self.multi_scale = [0.5, 1.5]   # multi scale: [img_size * 0.5, img_size * 1.5]
														
 
															+        ## Pixel mean & std
														
 
															+        self.pixel_mean = [0., 0., 0.]
														
 
															+        self.pixel_std  = [255., 255., 255.]
														
 
															+        ## Transforms
														
 
															+        self.train_img_size = 640
														
 
															+        self.test_img_size  = 640
														
 
															+        self.affine_params = {
														
 
															+            'degrees': 0.0,
														
 
															+            'translate': 0.1,
														
 
															+            'scale': [0.5, 1.5],
														
 
															+            'shear': 0.0,
														
 
															+            'perspective': 0.0,
														
 
															+            'hsv_h': 0.015,
														
 
															+            'hsv_s': 0.7,
														
 
															+            'hsv_v': 0.4,
														
 
															+        }
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															+
														
 
															+# YOLOv2-R18
														
 
															+class FcosR18Config(FcosBaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        self.backbone = 'resnet18'
														
 
															+
														
 
															+# YOLOv2-R50
														
 
															+class FcosR50Config(FcosBaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # TODO: Try your best.
														
--- a/yolo/config/yolof_config.py
+++ b/yolo/config/yolof_config.py
@@ -0,0 +1,115 @@
 
															+# Yolof Config
														
 
															+
														
 
															+
														
 
															+def build_yolof_config(args):
														
 
															+    if   args.model == 'yolof_r18':
														
 
															+        return YolofR18Config()
														
 
															+    elif args.model == 'yolof_r50':
														
 
															+        return YolofR50Config()
														
 
															+    else:
														
 
															+        raise NotImplementedError("No config for model: {}".format(args.model))
														
 
															+    
														
 
															+# Fcos-Base config
														
 
															+class YolofBaseConfig(object):
														
 
															+    def __init__(self) -> None:
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.out_stride = 32
														
 
															+        self.max_stride = 32
														
 
															+        ## Backbone
														
 
															+        self.backbone = 'resnet50'
														
 
															+        self.use_pretrained = True
														
 
															+        ## Encoder
														
 
															+        self.neck_expand_ratio = 0.25
														
 
															+        self.neck_dilations = [2, 4, 6, 8]
														
 
															+        ## Head
														
 
															+        self.head_dim  = 512
														
 
															+        self.num_cls_head = 2
														
 
															+        self.num_reg_head = 4
														
 
															+
														
 
															+        # ---------------- Post-process config ----------------
														
 
															+        ## Post process
														
 
															+        self.val_topk = 1000
														
 
															+        self.val_conf_thresh = 0.05
														
 
															+        self.val_nms_thresh  = 0.6
														
 
															+        self.test_topk = 300
														
 
															+        self.test_conf_thresh = 0.3
														
 
															+        self.test_nms_thresh  = 0.45
														
 
															+
														
 
															+        # ---------------- Assignment config ----------------
														
 
															+        ## Matcher
														
 
															+        self.center_clamp = 32
														
 
															+        self.match_topk_candidates = 4
														
 
															+        self.match_iou_thresh = 0.15
														
 
															+        self.ignore_thresh = 0.7
														
 
															+        self.anchor_size  = [[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]]
														
 
															+
														
 
															+        ## Loss weight
														
 
															+        self.focal_loss_alpha = 0.25
														
 
															+        self.focal_loss_gamma = 2.0
														
 
															+        self.loss_cls = 1.0
														
 
															+        self.loss_reg = 1.0
														
 
															+        self.loss_ctn = 1.0
														
 
															+
														
 
															+        # ---------------- ModelEMA config ----------------
														
 
															+        self.use_ema   = True
														
 
															+        self.ema_decay = 0.9998
														
 
															+        self.ema_tau   = 2000
														
 
															+
														
 
															+        # ---------------- Optimizer config ----------------
														
 
															+        self.trainer      = 'yolo'
														
 
															+        self.optimizer    = 'adamw'
														
 
															+        self.base_lr      = 0.001     # base_lr = per_image_lr * batch_size
														
 
															+        self.min_lr_ratio = 0.01      # min_lr  = base_lr * min_lr_ratio
														
 
															+        self.batch_size_base = 64
														
 
															+        self.momentum     = 0.9
														
 
															+        self.weight_decay = 0.05
														
 
															+        self.clip_max_norm   = 35.0
														
 
															+        self.warmup_bias_lr  = 0.1
														
 
															+        self.warmup_momentum = 0.8
														
 
															+
														
 
															+        # ---------------- Lr Scheduler config ----------------
														
 
															+        self.warmup_epoch = 3
														
 
															+        self.lr_scheduler = "cosine"
														
 
															+        self.max_epoch    = 150
														
 
															+        self.eval_epoch   = 10
														
 
															+        self.no_aug_epoch = -1
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.aug_type = 'yolo'
														
 
															+        self.mosaic_prob = 0.0
														
 
															+        self.mixup_prob  = 0.0
														
 
															+        self.copy_paste  = 0.0          # approximated by the YOLOX's mixup
														
 
															+        self.multi_scale = [0.5, 1.5]   # multi scale: [img_size * 0.5, img_size * 1.5]
														
 
															+        ## Pixel mean & std
														
 
															+        self.pixel_mean = [0., 0., 0.]
														
 
															+        self.pixel_std  = [255., 255., 255.]
														
 
															+        ## Transforms
														
 
															+        self.train_img_size = 640
														
 
															+        self.test_img_size  = 640
														
 
															+        self.affine_params = {
														
 
															+            'degrees': 0.0,
														
 
															+            'translate': 0.1,
														
 
															+            'scale': [0.5, 1.5],
														
 
															+            'shear': 0.0,
														
 
															+            'perspective': 0.0,
														
 
															+            'hsv_h': 0.015,
														
 
															+            'hsv_s': 0.7,
														
 
															+            'hsv_v': 0.4,
														
 
															+        }
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															+
														
 
															+# YOLOv2-R18
														
 
															+class YolofR18Config(YolofBaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        self.backbone = 'resnet18'
														
 
															+
														
 
															+# YOLOv2-R50
														
 
															+class YolofR50Config(YolofBaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # TODO: Try your best.
														
--- a/yolo/models/__init__.py
+++ b/yolo/models/__init__.py
@@ -12,6 +12,9 @@ from .yolov6.build import build_yolov6
 
															 from .yolov7.build import build_yolov7
														
 
															 from .yolov8.build import build_yolov8
														
 
															 from .yolov9.build import build_gelan
														
 
															+
														
 
															+from .yolof.build  import build_yolof
														
 
															+from .fcos.build   import build_fcos
														
 
															 from .rtdetr.build import build_rtdetr
														
@@ -48,6 +51,13 @@ def build_model(args, cfg, is_val=False):
 
															     ## GElan
														
 
															     elif 'yolov9' in args.model:
														
 
															         model, criterion = build_gelan(cfg, is_val)
														
 
															+
														
 
															+    ## Yolof
														
 
															+    elif 'yolof' in args.model:
														
 
															+        model, criterion = build_yolof(cfg, is_val)
														
 
															+    ## Fcos
														
 
															+    elif 'fcos' in args.model:
														
 
															+        model, criterion = build_fcos(cfg, is_val)
														
 
															     ## RT-DETR
														
 
															     elif 'rtdetr' in args.model:
														
 
															         model, criterion = build_rtdetr(cfg, is_val)
														
--- a/yolo/models/fcos/build.py
+++ b/yolo/models/fcos/build.py
@@ -0,0 +1,16 @@
 
															+from .loss import SetCriterion
														
 
															+from .fcos import Fcos
														
 
															+
														
 
															+
														
 
															+# build object detector
														
 
															+def build_fcos(cfg, is_val=False):
														
 
															+    # -------------- Build YOLO --------------
														
 
															+    model = Fcos(cfg, is_val)
														
 
															+  
														
 
															+    # -------------- Build criterion --------------
														
 
															+    criterion = None
														
 
															+    if is_val:
														
 
															+        # build criterion for training
														
 
															+        criterion = SetCriterion(cfg)
														
 
															+        
														
 
															+    return model, criterion
														
--- a/yolo/models/fcos/fcos.py
+++ b/yolo/models/fcos/fcos.py
@@ -0,0 +1,115 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+
														
 
															+# --------------- Model components ---------------
														
 
															+from .fcos_backbone import FcosBackbone
														
 
															+from .fcos_fpn import FcosFPN
														
 
															+from .fcos_head import FcosHead
														
 
															+
														
 
															+# --------------- External components ---------------
														
 
															+from utils.misc import multiclass_nms
														
 
															+
														
 
															+
														
 
															+# ------------------------ Fully Convolutional One-Stage Detector ------------------------
														
 
															+class Fcos(nn.Module):
														
 
															+    def __init__(self, 
														
 
															+                 cfg,
														
 
															+                 is_val = False,
														
 
															+                 ) -> None:
														
 
															+        super(Fcos, self).__init__()
														
 
															+        # ---------------------- Basic setting ----------------------
														
 
															+        self.cfg = cfg
														
 
															+        self.num_classes = cfg.num_classes
														
 
															+        ## Post-process parameters
														
 
															+        self.topk_candidates  = cfg.val_topk        if is_val else cfg.test_topk
														
 
															+        self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
														
 
															+        self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
														
 
															+        self.no_multi_labels  = False if is_val else True
														
 
															+
														
 
															+        # ---------------------- Network Parameters ----------------------
														
 
															+        self.backbone = FcosBackbone(cfg)
														
 
															+        self.fpn      = FcosFPN(cfg, self.backbone.feat_dims[-3:])
														
 
															+        self.head     = FcosHead(cfg, self.fpn.out_dim)
														
 
															+
														
 
															+    def post_process(self, cls_preds, ctn_preds, box_preds):
														
 
															+        """
														
 
															+        Input:
														
 
															+            cls_preds: List(Tensor) [[B, H x W, C], ...]
														
 
															+            ctn_preds: List(Tensor) [[B, H x W, 1], ...]
														
 
															+            box_preds: List(Tensor) [[B, H x W, 4], ...]
														
 
															+        """
														
 
															+        all_scores = []
														
 
															+        all_labels = []
														
 
															+        all_bboxes = []
														
 
															+        
														
 
															+        for cls_pred_i, ctn_pred_i, box_pred_i in zip(cls_preds, ctn_preds, box_preds):
														
 
															+            cls_pred_i = cls_pred_i[0]
														
 
															+            ctn_pred_i = ctn_pred_i[0]
														
 
															+            box_pred_i = box_pred_i[0]
														
 
															+            
														
 
															+            # (H x W x C,)
														
 
															+            scores_i = torch.sqrt(cls_pred_i.sigmoid() * ctn_pred_i.sigmoid()).flatten()
														
 
															+
														
 
															+            # Keep top k top scoring indices only.
														
 
															+            num_topk = min(self.topk_candidates, box_pred_i.size(0))
														
 
															+
														
 
															+            # torch.sort is actually faster than .topk (at least on GPUs)
														
 
															+            predicted_prob, topk_idxs = scores_i.sort(descending=True)
														
 
															+            topk_scores = predicted_prob[:num_topk]
														
 
															+            topk_idxs = topk_idxs[:num_topk]
														
 
															+
														
 
															+            # filter out the proposals with low confidence score
														
 
															+            keep_idxs = topk_scores > self.conf_thresh
														
 
															+            topk_idxs = topk_idxs[keep_idxs]
														
 
															+
														
 
															+            # final scores
														
 
															+            scores = topk_scores[keep_idxs]
														
 
															+            # final labels
														
 
															+            labels = topk_idxs % self.num_classes
														
 
															+            # final bboxes
														
 
															+            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
														
 
															+            bboxes = box_pred_i[anchor_idxs]
														
 
															+
														
 
															+            all_scores.append(scores)
														
 
															+            all_labels.append(labels)
														
 
															+            all_bboxes.append(bboxes)
														
 
															+
														
 
															+        scores = torch.cat(all_scores)
														
 
															+        labels = torch.cat(all_labels)
														
 
															+        bboxes = torch.cat(all_bboxes)
														
 
															+
														
 
															+        # to cpu & numpy
														
 
															+        scores = scores.cpu().numpy()
														
 
															+        labels = labels.cpu().numpy()
														
 
															+        bboxes = bboxes.cpu().numpy()
														
 
															+
														
 
															+        # nms
														
 
															+        scores, labels, bboxes = multiclass_nms(
														
 
															+            scores, labels, bboxes, self.nms_thresh, self.num_classes)
														
 
															+
														
 
															+        return bboxes, scores, labels
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        # ---------------- Backbone ----------------
														
 
															+        pyramid_feats = self.backbone(x)
														
 
															+
														
 
															+        # ---------------- Neck ----------------
														
 
															+        pyramid_feats = self.fpn(pyramid_feats)
														
 
															+
														
 
															+        # ---------------- Heads ----------------
														
 
															+        outputs = self.head(pyramid_feats)
														
 
															+
														
 
															+        if not self.training:
														
 
															+            # ---------------- PostProcess ----------------
														
 
															+            cls_pred = outputs["pred_cls"]
														
 
															+            ctn_pred = outputs["pred_ctn"]
														
 
															+            box_pred = outputs["pred_box"]
														
 
															+            bboxes, scores, labels = self.post_process(cls_pred, ctn_pred, box_pred)
														
 
															+
														
 
															+            outputs = {
														
 
															+                'scores': scores,
														
 
															+                'labels': labels,
														
 
															+                'bboxes': bboxes
														
 
															+            }
														
 
															+
														
 
															+        return outputs 
														
--- a/yolo/models/fcos/fcos_backbone.py
+++ b/yolo/models/fcos/fcos_backbone.py
@@ -0,0 +1,52 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+
														
 
															+try:
														
 
															+    from .resnet import build_resnet
														
 
															+except:
														
 
															+    from  resnet import build_resnet
														
 
															+
														
 
															+
														
 
															+# --------------------- Yolov1's Backbone -----------------------
														
 
															+class FcosBackbone(nn.Module):
														
 
															+    def __init__(self, cfg):
														
 
															+        super().__init__()
														
 
															+        self.backbone, self.feat_dims = build_resnet(cfg.backbone, cfg.use_pretrained)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        pyramid_feats = self.backbone(x)
														
 
															+
														
 
															+        return pyramid_feats # [C3, C4, C5]
														
 
															+
														
 
															+
														
 
															+if __name__=='__main__':
														
 
															+    from thop import profile
														
 
															+
														
 
															+    # YOLOv1 configuration
														
 
															+    class FcosBaseConfig(object):
														
 
															+        def __init__(self) -> None:
														
 
															+            # ---------------- Model config ----------------
														
 
															+            self.out_stride = [8, 16, 32]
														
 
															+            self.max_stride = 32
														
 
															+            ## Backbone
														
 
															+            self.backbone = 'resnet18'
														
 
															+            self.use_pretrained = True
														
 
															+    cfg = FcosBaseConfig()
														
 
															+
														
 
															+    # Build backbone
														
 
															+    model = FcosBackbone(cfg)
														
 
															+
														
 
															+    # Randomly generate a input data
														
 
															+    x = torch.randn(2, 3, 640, 640)
														
 
															+
														
 
															+    # Inference
														
 
															+    outputs = model(x)
														
 
															+    print(' - the shape of input :  ', x.shape)
														
 
															+    for i, out in enumerate(outputs):
														
 
															+        print(f' - the shape of level-{i} output : ', out.shape)
														
 
															+
														
 
															+    x = torch.randn(1, 3, 640, 640)
														
 
															+    flops, params = profile(model, inputs=(x, ), verbose=False)
														
 
															+    print('============== FLOPs & Params ================')
														
 
															+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
														
 
															+    print(' - Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/fcos/fcos_fpn.py
+++ b/yolo/models/fcos/fcos_fpn.py
@@ -1,68 +1,88 @@
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															 import torch.nn.functional as F
														
 
															+from typing import List
														
 
															+
														
 
															 # ------------------ Basic Feature Pyramid Network ------------------
														
 
															-class BasicFPN(nn.Module):
														
 
															-    def __init__(self, cfg, 
														
 
															-                 in_dims=[512, 1024, 2048],
														
 
															-                 out_dim=256,
														
 
															-                 ):
														
 
															+class FcosFPN(nn.Module):
														
 
															+    def __init__(self, cfg, in_dims: List = [512, 1024, 2048]):
														
 
															         super().__init__()
														
 
															         # ------------------ Basic parameters -------------------
														
 
															-        self.p6_feat = cfg.fpn_p6_feat
														
 
															-        self.p7_feat = cfg.fpn_p7_feat
														
 
															-        self.from_c5 = cfg.fpn_p6_from_c5
														
 
															+        self.out_dim = cfg.head_dim
														
 
															         # ------------------ Network parameters -------------------
														
 
															-        ## latter layers
														
 
															-        self.input_projs = nn.ModuleList()
														
 
															-        self.smooth_layers = nn.ModuleList()
														
 
															-        for in_dim in in_dims[::-1]:
														
 
															-            self.input_projs.append(nn.Conv2d(in_dim, out_dim, kernel_size=1))
														
 
															-            self.smooth_layers.append(nn.Conv2d(out_dim, out_dim, kernel_size=3, padding=1))
														
 
															+        self.input_proj_1 = nn.Conv2d(in_dims[0], self.out_dim, kernel_size=1)
														
 
															+        self.input_proj_2 = nn.Conv2d(in_dims[1], self.out_dim, kernel_size=1)
														
 
															+        self.input_proj_3 = nn.Conv2d(in_dims[2], self.out_dim, kernel_size=1)
														
 
															+
														
 
															+        self.smooth_layer_1 = nn.Conv2d(self.out_dim, self.out_dim, kernel_size=3, padding=1, stride=1)
														
 
															+        self.smooth_layer_2 = nn.Conv2d(self.out_dim, self.out_dim, kernel_size=3, padding=1, stride=1)
														
 
															+        self.smooth_layer_3 = nn.Conv2d(self.out_dim, self.out_dim, kernel_size=3, padding=1, stride=1)
														
 
															-        ## P6/P7 layers
														
 
															-        if self.p6_feat:
														
 
															-            if self.from_c5:
														
 
															-                self.p6_conv = nn.Conv2d(in_dims[-1], out_dim, kernel_size=3, stride=2, padding=1)
														
 
															-            else: # from p5
														
 
															-                self.p6_conv = nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=2, padding=1)
														
 
															-        if self.p7_feat:
														
 
															-            self.p7_conv = nn.Sequential(
														
 
															-                nn.ReLU(inplace=True),
														
 
															-                nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=2, padding=1)
														
 
															-            )
														
 
															+        self.p6_conv = nn.Conv2d(self.out_dim, self.out_dim, kernel_size=3, stride=2, padding=1)
														
 
															     def forward(self, feats):
														
 
															         """
														
 
															-            feats: (List of Tensor) [C3, C4, C5], C_i ∈ R^(B x C_i x H_i x W_i)
														
 
															+            feats: (List of Tensor) [C3, C4, C5]
														
 
															         """
														
 
															-        outputs = []
														
 
															-        # [C3, C4, C5] -> [C5, C4, C3]
														
 
															-        feats = feats[::-1]
														
 
															-        top_level_feat = feats[0]
														
 
															-        prev_feat = self.input_projs[0](top_level_feat)
														
 
															-        outputs.append(self.smooth_layers[0](prev_feat))
														
 
															+        c3, c4, c5 = feats
														
 
															-        for feat, input_proj, smooth_layer in zip(feats[1:], self.input_projs[1:], self.smooth_layers[1:]):
														
 
															-            feat = input_proj(feat)
														
 
															-            top_down_feat = F.interpolate(prev_feat, size=feat.shape[2:], mode='nearest')
														
 
															-            prev_feat = feat + top_down_feat
														
 
															-            outputs.insert(0, smooth_layer(prev_feat))
														
 
															+        # -------- Input projection --------
														
 
															+        p3 = self.input_proj_1(c3)
														
 
															+        p4 = self.input_proj_2(c4)
														
 
															+        p5 = self.input_proj_3(c5)
														
 
															+        
														
 
															+        # -------- Feature fusion --------
														
 
															+        outputs = [self.smooth_layer_3(p5)]
														
 
															+        # P5 -> P4
														
 
															+        p4 = p4 + F.interpolate(p5, size=p4.shape[2:], mode='nearest')
														
 
															+        outputs.insert(0, self.smooth_layer_2(p4))
														
 
															-        if self.p6_feat:
														
 
															-            if self.from_c5:
														
 
															-                p6_feat = self.p6_conv(feats[0])
														
 
															-            else:
														
 
															-                p6_feat = self.p6_conv(outputs[-1])
														
 
															-            # [P3, P4, P5] -> [P3, P4, P5, P6]
														
 
															-            outputs.append(p6_feat)
														
 
															+        # P4 -> P3
														
 
															+        p3 = p3 + F.interpolate(p4, size=p3.shape[2:], mode='nearest')
														
 
															+        outputs.insert(0, self.smooth_layer_1(p3))
														
 
															-            if self.p7_feat:
														
 
															-                p7_feat = self.p7_conv(p6_feat)
														
 
															-                # [P3, P4, P5, P6] -> [P3, P4, P5, P6, P7]
														
 
															-                outputs.append(p7_feat)
														
 
															+        # P5 -> P6
														
 
															+        outputs.append(self.p6_conv(outputs[-1]))
														
 
															-        # [P3, P4, P5] or [P3, P4, P5, P6, P7]
														
 
															+        # [P3, P4, P5, P6]
														
 
															         return outputs
														
 
															+
														
 
															+
														
 
															+if __name__=='__main__':
														
 
															+    import time
														
 
															+    from thop import profile
														
 
															+    # Model config
														
 
															+    
														
 
															+    # YOLOv2-Base config
														
 
															+    class FcosBaseConfig(object):
														
 
															+        def __init__(self) -> None:
														
 
															+            # ---------------- Model config ----------------
														
 
															+            self.width    = 0.50
														
 
															+            self.depth    = 0.34
														
 
															+            self.out_stride = [8, 16, 32, 64]
														
 
															+            ## Head
														
 
															+            self.head_dim = 256
														
 
															+
														
 
															+    cfg = FcosBaseConfig()
														
 
															+    # Build a head
														
 
															+    in_dims  = [128, 256, 512]
														
 
															+    fpn = FcosFPN(cfg, in_dims)
														
 
															+
														
 
															+    # Inference
														
 
															+    x = [torch.randn(1, in_dims[0], 80, 80),
														
 
															+         torch.randn(1, in_dims[1], 40, 40),
														
 
															+         torch.randn(1, in_dims[2], 20, 20)]
														
 
															+    t0 = time.time()
														
 
															+    output = fpn(x)
														
 
															+    t1 = time.time()
														
 
															+    print('Time: ', t1 - t0)
														
 
															+    print('====== FPN output ====== ')
														
 
															+    for level, feat in enumerate(output):
														
 
															+        print("- Level-{} : ".format(level), feat.shape)
														
 
															+
														
 
															+    flops, params = profile(fpn, inputs=(x, ), verbose=False)
														
 
															+    print('==============================')
														
 
															+    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
														
 
															+    print('Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/fcos/fcos_head.py
+++ b/yolo/models/fcos/fcos_head.py
@@ -1,7 +1,11 @@
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															+import torch.nn.functional as F
														
 
															-from .modules import BasicConv
														
 
															+try:
														
 
															+    from .modules import ConvModule
														
 
															+except:
														
 
															+    from  modules import ConvModule
														
 
															 class Scale(nn.Module):
														
@@ -25,53 +29,34 @@ class Scale(nn.Module):
 
															         return x * self.scale
														
 
															 class FcosHead(nn.Module):
														
 
															-    def __init__(self, cfg, in_dim, out_dim,):
														
 
															+    def __init__(self, cfg, in_dim: int = 256,):
														
 
															         super().__init__()
														
 
															-        self.fmp_size = None
														
 
															         # ------------------ Basic parameters -------------------
														
 
															         self.cfg = cfg
														
 
															         self.in_dim = in_dim
														
 
															-        self.stride       = cfg.out_stride
														
 
															-        self.num_classes  = cfg.num_classes
														
 
															-        self.num_cls_head = cfg.num_cls_head
														
 
															-        self.num_reg_head = cfg.num_reg_head
														
 
															-        self.act_type     = cfg.head_act
														
 
															-        self.norm_type    = cfg.head_norm
														
 
															+        self.out_dim = cfg.head_dim
														
 
															+        self.out_stride  = cfg.out_stride
														
 
															+        self.num_classes = cfg.num_classes
														
 
															         # ------------------ Network parameters -------------------
														
 
															-        ## cls head
														
 
															+        ## classification head
														
 
															         cls_heads = []
														
 
															-        self.cls_head_dim = out_dim
														
 
															-        for i in range(self.num_cls_head):
														
 
															+        self.cls_head_dim = cfg.head_dim
														
 
															+        for i in range(cfg.num_cls_head):
														
 
															             if i == 0:
														
 
															-                cls_heads.append(
														
 
															-                    BasicConv(in_dim, self.cls_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                cls_heads.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															             else:
														
 
															-                cls_heads.append(
														
 
															-                    BasicConv(self.cls_head_dim, self.cls_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                cls_heads.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															-        ## reg head
														
 
															+        ## bbox regression head
														
 
															         reg_heads = []
														
 
															-        self.reg_head_dim = out_dim
														
 
															-        for i in range(self.num_reg_head):
														
 
															+        self.reg_head_dim = cfg.head_dim
														
 
															+        for i in range(cfg.num_reg_head):
														
 
															             if i == 0:
														
 
															-                reg_heads.append(
														
 
															-                    BasicConv(in_dim, self.reg_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                reg_heads.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															             else:
														
 
															-                reg_heads.append(
														
 
															-                    BasicConv(self.reg_head_dim, self.reg_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                reg_heads.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															+        
														
 
															         self.cls_heads = nn.Sequential(*cls_heads)
														
 
															         self.reg_heads = nn.Sequential(*reg_heads)
														
@@ -82,7 +67,7 @@ class FcosHead(nn.Module):
 
															         ## scale layers
														
 
															         self.scales = nn.ModuleList(
														
 
															-            Scale() for _ in range(len(self.stride))
														
 
															+            Scale() for _ in range(len(self.out_stride))
														
 
															         )
														
 
															         # init bias
														
@@ -112,8 +97,9 @@ class FcosHead(nn.Module):
 
															         fmp_h, fmp_w = fmp_size
														
 
															         anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															         # [H, W, 2] -> [HW, 2]
														
 
															-        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2) + 0.5
														
 
															-        anchors *= self.stride[level]
														
 
															+        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
														
 
															+        anchors += 0.5
														
 
															+        anchors *= self.out_stride[level]
														
 
															         return anchors
														
@@ -130,8 +116,7 @@ class FcosHead(nn.Module):
 
															         return pred_box
														
 
															-    def forward(self, pyramid_feats, mask=None):
														
 
															-        all_masks = []
														
 
															+    def forward(self, pyramid_feats):
														
 
															         all_anchors = []
														
 
															         all_cls_preds = []
														
 
															         all_reg_preds = []
														
@@ -158,16 +143,10 @@ class FcosHead(nn.Module):
 
															             cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
														
 
															             ctn_pred = ctn_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
														
 
															             reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
														
 
															-            reg_pred = nn.functional.relu(self.scales[level](reg_pred)) * self.stride[level]
														
 
															+            reg_pred = F.relu(self.scales[level](reg_pred)) * self.out_stride[level]
														
 
															+
														
 
															             ## Decode bbox
														
 
															             box_pred = self.decode_boxes(reg_pred, anchors)
														
 
															-            ## Adjust mask
														
 
															-            if mask is not None:
														
 
															-                # [B, H, W]
														
 
															-                mask_i = torch.nn.functional.interpolate(mask[None].float(), size=[H, W]).bool()[0]
														
 
															-                # [B, H, W] -> [B, M]
														
 
															-                mask_i = mask_i.flatten(1)     
														
 
															-                all_masks.append(mask_i)
														
 
															             all_anchors.append(anchors)
														
 
															             all_cls_preds.append(cls_pred)
														
@@ -180,7 +159,52 @@ class FcosHead(nn.Module):
 
															                    "pred_box": all_box_preds,  # List [B, M, 4]
														
 
															                    "pred_ctn": all_ctn_preds,  # List [B, M, 1]
														
 
															                    "anchors": all_anchors,     # List [B, M, 2]
														
 
															-                   "strides": self.stride,
														
 
															-                   "mask": all_masks}          # List [B, M,]
														
 
															+                   "strides": self.out_stride,
														
 
															+                   }
														
 
															         return outputs 
														
 
															+
														
 
															+
														
 
															+if __name__=='__main__':
														
 
															+    import time
														
 
															+    from thop import profile
														
 
															+    # Model config
														
 
															+    
														
 
															+    # YOLOv3-Base config
														
 
															+    class FcosBaseConfig(object):
														
 
															+        def __init__(self) -> None:
														
 
															+            # ---------------- Model config ----------------
														
 
															+            self.width = 0.50
														
 
															+            self.depth = 0.34
														
 
															+
														
 
															+            self.out_stride = [8, 16, 32, 64]
														
 
															+            self.num_classes = 20
														
 
															+
														
 
															+            ## Head
														
 
															+            self.head_dim  = 256
														
 
															+            self.num_cls_head = 4
														
 
															+            self.num_reg_head = 4
														
 
															+
														
 
															+    cfg = FcosBaseConfig()
														
 
															+    feat_dim = 256
														
 
															+    pyramid_feats = [torch.randn(1, feat_dim, 80, 80),
														
 
															+                     torch.randn(1, feat_dim, 40, 40),
														
 
															+                     torch.randn(1, feat_dim, 20, 20),
														
 
															+                     torch.randn(1, feat_dim, 10, 10)]
														
 
															+
														
 
															+    # Build a head
														
 
															+    head = FcosHead(cfg, feat_dim)
														
 
															+
														
 
															+    # Inference
														
 
															+    t0 = time.time()
														
 
															+    outputs = head(pyramid_feats)
														
 
															+    t1 = time.time()
														
 
															+    print('Time: ', t1 - t0)
														
 
															+    print("====== FCOS Head output ======")
														
 
															+    for k in outputs:
														
 
															+        print(f" - shape of {k}: ", outputs[k].shape )
														
 
															+
														
 
															+    flops, params = profile(head, inputs=(pyramid_feats, ), verbose=False)
														
 
															+    print('==============================')
														
 
															+    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
														
 
															+    print('Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/fcos/loss.py
+++ b/yolo/models/fcos/loss.py
@@ -2,42 +2,33 @@ import torch
 
															 import torch.nn as nn
														
 
															 import torch.nn.functional as F
														
 
															-from utils.box_ops import get_ious
														
 
															 from utils.misc import sigmoid_focal_loss
														
 
															 from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
														
 
															-from .matcher import FcosMatcher, AlignedOTAMatcher
														
 
															+from .matcher import FcosMatcher
														
 
															-class SetCriterion(nn.Module):
														
 
															+class SetCriterion(object):
														
 
															     def __init__(self, cfg):
														
 
															-        super().__init__()
														
 
															         # ------------- Basic parameters -------------
														
 
															         self.cfg = cfg
														
 
															         self.num_classes = cfg.num_classes
														
 
															+
														
 
															         # ------------- Focal loss -------------
														
 
															         self.alpha = cfg.focal_loss_alpha
														
 
															         self.gamma = cfg.focal_loss_gamma
														
 
															+
														
 
															         # ------------- Loss weight -------------
														
 
															-        # ------------- Matcher & Loss weight -------------
														
 
															-        self.matcher_cfg = cfg.matcher_hpy
														
 
															-        if cfg.matcher == 'fcos_matcher':
														
 
															-            self.weight_dict = {'loss_cls': cfg.loss_cls_weight,
														
 
															-                                'loss_reg': cfg.loss_reg_weight,
														
 
															-                                'loss_ctn': cfg.loss_ctn_weight}
														
 
															-            self.matcher = FcosMatcher(cfg.num_classes,
														
 
															-                                       self.matcher_cfg['center_sampling_radius'],
														
 
															-                                       self.matcher_cfg['object_sizes_of_interest'],
														
 
															-                                       [1., 1., 1., 1.]
														
 
															-                                       )
														
 
															-        elif cfg.matcher == 'simota':
														
 
															-            self.weight_dict = {'loss_cls': cfg.loss_cls_weight,
														
 
															-                                'loss_reg': cfg.loss_reg_weight}
														
 
															-            self.matcher = AlignedOTAMatcher(cfg.num_classes,
														
 
															-                                             self.matcher_cfg['soft_center_radius'],
														
 
															-                                             self.matcher_cfg['topk_candidates'])
														
 
															-        else:
														
 
															-            raise NotImplementedError("Unknown matcher: {}.".format(cfg.matcher))
														
 
															+        self.weight_dict = {'loss_cls': cfg.loss_cls,
														
 
															+                            'loss_reg': cfg.loss_reg,
														
 
															+                            'loss_ctn': cfg.loss_ctn,}
														
 
															+        
														
 
															+        # ------------- Matcher -------------
														
 
															+        self.matcher = FcosMatcher(cfg.num_classes,
														
 
															+                                   center_sampling_radius=cfg.center_sampling_radius,
														
 
															+                                   object_sizes_of_interest=cfg.object_sizes_of_interest,
														
 
															+                                   box_weights=[1., 1., 1., 1.],
														
 
															+                                   )
														
 
															     def loss_labels(self, pred_cls, tgt_cls, num_boxes=1.0):
														
 
															         """
														
@@ -49,34 +40,7 @@ class SetCriterion(nn.Module):
 
															         return loss_cls.sum() / num_boxes
														
 
															-    def loss_labels_qfl(self, pred_cls, target, beta=2.0, num_boxes=1.0):
														
 
															-        # Quality FocalLoss
														
 
															-        """
														
 
															-            pred_cls: (torch.Tensor): [N, C]。
														
 
															-            target:   (tuple([torch.Tensor], [torch.Tensor])): label -> (N,), score -> (N)
														
 
															-        """
														
 
															-        label, score = target
														
 
															-        pred_sigmoid = pred_cls.sigmoid()
														
 
															-        scale_factor = pred_sigmoid
														
 
															-        zerolabel = scale_factor.new_zeros(pred_cls.shape)
														
 
															-
														
 
															-        ce_loss = F.binary_cross_entropy_with_logits(
														
 
															-            pred_cls, zerolabel, reduction='none') * scale_factor.pow(beta)
														
 
															-        
														
 
															-        bg_class_ind = pred_cls.shape[-1]
														
 
															-        pos = ((label >= 0) & (label < bg_class_ind)).nonzero().squeeze(1)
														
 
															-        if pos.shape[0] > 0:
														
 
															-            pos_label = label[pos].long()
														
 
															-
														
 
															-            scale_factor = score[pos] - pred_sigmoid[pos, pos_label]
														
 
															-
														
 
															-            ce_loss[pos, pos_label] = F.binary_cross_entropy_with_logits(
														
 
															-                pred_cls[pos, pos_label], score[pos],
														
 
															-                reduction='none') * scale_factor.abs().pow(beta)
														
 
															-
														
 
															-        return ce_loss.sum() / num_boxes
														
 
															-    
														
 
															-    def loss_bboxes_ltrb(self, pred_delta, tgt_delta, bbox_quality=None, num_boxes=1.0):
														
 
															+    def loss_bboxes(self, pred_delta, tgt_delta, bbox_quality=None, num_boxes=1.0):
														
 
															         """
														
 
															             pred_box: (Tensor) [N, 4]
														
 
															             tgt_box:  (Tensor) [N, 4]
														
@@ -114,16 +78,7 @@ class SetCriterion(nn.Module):
 
															         return loss_box.sum() / num_boxes
														
 
															-    def loss_bboxes_xyxy(self, pred_box, gt_box, num_boxes=1.0, box_weight=None):
														
 
															-        ious = get_ious(pred_box, gt_box, box_mode="xyxy", iou_type='giou')
														
 
															-        loss_box = 1.0 - ious
														
 
															-
														
 
															-        if box_weight is not None:
														
 
															-            loss_box = loss_box.squeeze(-1) * box_weight
														
 
															-
														
 
															-        return loss_box.sum() / num_boxes
														
 
															-    
														
 
															-    def fcos_loss(self, outputs, targets):
														
 
															+    def __call__(self, outputs, targets):
														
 
															         """
														
 
															             outputs['pred_cls']: (Tensor) [B, M, C]
														
 
															             outputs['pred_reg']: (Tensor) [B, M, 4]
														
@@ -137,10 +92,10 @@ class SetCriterion(nn.Module):
 
															         device = outputs['pred_cls'][0].device
														
 
															         fpn_strides = outputs['strides']
														
 
															         anchors = outputs['anchors']
														
 
															-        pred_cls = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)
														
 
															+
														
 
															+        pred_cls   = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)
														
 
															         pred_delta = torch.cat(outputs['pred_reg'], dim=1).view(-1, 4)
														
 
															-        pred_ctn = torch.cat(outputs['pred_ctn'], dim=1).view(-1, 1)
														
 
															-        masks = ~torch.cat(outputs['mask'], dim=1).view(-1)
														
 
															+        pred_ctn   = torch.cat(outputs['pred_ctn'], dim=1).view(-1, 1)
														
 
															         # -------------------- Label Assignment --------------------
														
 
															         gt_classes, gt_deltas, gt_centerness = self.matcher(fpn_strides, anchors, targets)
														
@@ -148,33 +103,31 @@ class SetCriterion(nn.Module):
 
															         gt_deltas = gt_deltas.view(-1, 4).to(device)
														
 
															         gt_centerness = gt_centerness.view(-1, 1).to(device)
														
 
															-        foreground_idxs = (gt_classes >= 0) & (gt_classes != self.num_classes)
														
 
															-        num_foreground = foreground_idxs.sum()
														
 
															+        fg_masks = (gt_classes >= 0) & (gt_classes != self.num_classes)
														
 
															+        num_fgs = fg_masks.sum()
														
 
															         if is_dist_avail_and_initialized():
														
 
															-            torch.distributed.all_reduce(num_foreground)
														
 
															-        num_foreground = torch.clamp(num_foreground / get_world_size(), min=1).item()
														
 
															+            torch.distributed.all_reduce(num_fgs)
														
 
															+        num_fgs = torch.clamp(num_fgs / get_world_size(), min=1).item()
														
 
															-        num_foreground_centerness = gt_centerness[foreground_idxs].sum()
														
 
															+        num_fgs_ctn = gt_centerness[fg_masks].sum()
														
 
															         if is_dist_avail_and_initialized():
														
 
															-            torch.distributed.all_reduce(num_foreground_centerness)
														
 
															-        num_targets = torch.clamp(num_foreground_centerness / get_world_size(), min=1).item()
														
 
															+            torch.distributed.all_reduce(num_fgs_ctn)
														
 
															+        num_targets = torch.clamp(num_fgs_ctn / get_world_size(), min=1).item()
														
 
															         # -------------------- classification loss --------------------
														
 
															         gt_classes_target = torch.zeros_like(pred_cls)
														
 
															-        gt_classes_target[foreground_idxs, gt_classes[foreground_idxs]] = 1
														
 
															-        valid_idxs = (gt_classes >= 0) & masks
														
 
															-        loss_labels = self.loss_labels(
														
 
															-            pred_cls[valid_idxs], gt_classes_target[valid_idxs], num_foreground)
														
 
															+        gt_classes_target[fg_masks, gt_classes[fg_masks]] = 1
														
 
															+        loss_labels = self.loss_labels(pred_cls, gt_classes_target, num_fgs)
														
 
															         # -------------------- regression loss --------------------
														
 
															-        loss_bboxes = self.loss_bboxes_ltrb(
														
 
															-            pred_delta[foreground_idxs], gt_deltas[foreground_idxs], gt_centerness[foreground_idxs], num_targets)
														
 
															+        loss_bboxes = self.loss_bboxes(
														
 
															+            pred_delta[fg_masks], gt_deltas[fg_masks], gt_centerness[fg_masks], num_targets)
														
 
															         # -------------------- centerness loss --------------------
														
 
															         loss_centerness = F.binary_cross_entropy_with_logits(
														
 
															-            pred_ctn[foreground_idxs],  gt_centerness[foreground_idxs], reduction='none')
														
 
															-        loss_centerness = loss_centerness.sum() / num_foreground
														
 
															+            pred_ctn[fg_masks],  gt_centerness[fg_masks], reduction='none')
														
 
															+        loss_centerness = loss_centerness.sum() / num_fgs
														
 
															         total_loss = loss_labels * self.weight_dict["loss_cls"] + \
														
 
															                      loss_bboxes * self.weight_dict["loss_reg"] + \
														
@@ -187,104 +140,3 @@ class SetCriterion(nn.Module):
 
															         )
														
 
															         return loss_dict
														
 
															-    
														
 
															-    def ota_loss(self, outputs, targets):
														
 
															-        """
														
 
															-            outputs['pred_cls']: (Tensor) [B, M, C]
														
 
															-            outputs['pred_reg']: (Tensor) [B, M, 4]
														
 
															-            outputs['pred_box']: (Tensor) [B, M, 4]
														
 
															-            outputs['strides']: (List) [8, 16, 32, ...] stride of the model output
														
 
															-            targets: (List) [dict{'boxes': [...], 
														
 
															-                                 'labels': [...], 
														
 
															-                                 'orig_size': ...}, ...]
														
 
															-        """
														
 
															-        # -------------------- Pre-process --------------------
														
 
															-        bs          = outputs['pred_cls'][0].shape[0]
														
 
															-        device      = outputs['pred_cls'][0].device
														
 
															-        fpn_strides = outputs['strides']
														
 
															-        anchors     = outputs['anchors']
														
 
															-        # preds: [B, M, C]
														
 
															-        # preds: [B, M, C]
														
 
															-        cls_preds = torch.cat(outputs['pred_cls'], dim=1)
														
 
															-        box_preds = torch.cat(outputs['pred_box'], dim=1)
														
 
															-        masks = ~torch.cat(outputs['mask'], dim=1).view(-1)
														
 
															-
														
 
															-        # -------------------- Label Assignment --------------------
														
 
															-        cls_targets = []
														
 
															-        box_targets = []
														
 
															-        assign_metrics = []
														
 
															-        for batch_idx in range(bs):
														
 
															-            tgt_labels = targets[batch_idx]["labels"].to(device)  # [N,]
														
 
															-            tgt_bboxes = targets[batch_idx]["boxes"].to(device)   # [N, 4]
														
 
															-            # refine target
														
 
															-            tgt_boxes_wh = tgt_bboxes[..., 2:] - tgt_bboxes[..., :2]
														
 
															-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
														
 
															-            keep = (min_tgt_size >= 8)
														
 
															-            tgt_bboxes = tgt_bboxes[keep]
														
 
															-            tgt_labels = tgt_labels[keep]
														
 
															-            # label assignment
														
 
															-            assigned_result = self.matcher(fpn_strides=fpn_strides,
														
 
															-                                           anchors=anchors,
														
 
															-                                           pred_cls=cls_preds[batch_idx].detach(),
														
 
															-                                           pred_box=box_preds[batch_idx].detach(),
														
 
															-                                           gt_labels=tgt_labels,
														
 
															-                                           gt_bboxes=tgt_bboxes
														
 
															-                                           )
														
 
															-            cls_targets.append(assigned_result['assigned_labels'])
														
 
															-            box_targets.append(assigned_result['assigned_bboxes'])
														
 
															-            assign_metrics.append(assigned_result['assign_metrics'])
														
 
															-
														
 
															-        # List[B, M, C] -> Tensor[BM, C]
														
 
															-        cls_targets = torch.cat(cls_targets, dim=0)
														
 
															-        box_targets = torch.cat(box_targets, dim=0)
														
 
															-        assign_metrics = torch.cat(assign_metrics, dim=0)
														
 
															-
														
 
															-        valid_idxs = (cls_targets >= 0) & masks
														
 
															-        foreground_idxs = (cls_targets >= 0) & (cls_targets != self.num_classes)
														
 
															-        num_fgs = assign_metrics.sum()
														
 
															-
														
 
															-        if is_dist_avail_and_initialized():
														
 
															-            torch.distributed.all_reduce(num_fgs)
														
 
															-        num_fgs = torch.clamp(num_fgs / get_world_size(), min=1).item()
														
 
															-
														
 
															-        # -------------------- classification loss --------------------
														
 
															-        cls_preds = cls_preds.view(-1, self.num_classes)[valid_idxs]
														
 
															-        qfl_targets = (cls_targets[valid_idxs], assign_metrics[valid_idxs])
														
 
															-        loss_labels = self.loss_labels_qfl(cls_preds, qfl_targets, 2.0, num_fgs)
														
 
															-
														
 
															-        # -------------------- regression loss --------------------
														
 
															-        box_preds_pos = box_preds.view(-1, 4)[foreground_idxs]
														
 
															-        box_targets_pos = box_targets[foreground_idxs]
														
 
															-        box_weight = assign_metrics[foreground_idxs]
														
 
															-        loss_bboxes = self.loss_bboxes_xyxy(box_preds_pos, box_targets_pos, num_fgs, box_weight)
														
 
															-
														
 
															-        total_loss = loss_labels * self.weight_dict["loss_cls"] + \
														
 
															-                     loss_bboxes * self.weight_dict["loss_reg"]
														
 
															-        loss_dict = dict(
														
 
															-                loss_cls = loss_labels,
														
 
															-                loss_reg = loss_bboxes,
														
 
															-                losses   = total_loss,
														
 
															-        )
														
 
															-
														
 
															-        return loss_dict
														
 
															-    
														
 
															-    def forward(self, outputs, targets):
														
 
															-        """
														
 
															-            outputs['pred_cls']: (Tensor) [B, M, C]
														
 
															-            outputs['pred_reg']: (Tensor) [B, M, 4]
														
 
															-            outputs['pred_ctn']: (Tensor) [B, M, 1]
														
 
															-            outputs['strides']: (List) [8, 16, 32, ...] stride of the model output
														
 
															-            targets: (List) [dict{'boxes': [...], 
														
 
															-                                 'labels': [...], 
														
 
															-                                 'orig_size': ...}, ...]
														
 
															-        """
														
 
															-        if self.cfg.matcher == "fcos_matcher":
														
 
															-            return self.fcos_loss(outputs, targets)
														
 
															-        elif self.cfg.matcher == "simota":
														
 
															-            return self.ota_loss(outputs, targets)
														
 
															-        else:
														
 
															-            raise NotImplementedError
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    pass
														
--- a/yolo/models/fcos/matcher.py
+++ b/yolo/models/fcos/matcher.py
@@ -89,7 +89,6 @@ class FcosMatcher(object):
 
															         self.object_sizes_of_interest = object_sizes_of_interest
														
 
															         self.box_weightss = box_weights
														
 
															-
														
 
															     def get_deltas(self, anchors, boxes):
														
 
															         """
														
 
															         Get box regression transformation deltas (dl, dt, dr, db) that can be used
														
@@ -107,7 +106,6 @@ class FcosMatcher(object):
 
															                            dim=-1) * anchors.new_tensor(self.box_weightss)
														
 
															         return deltas
														
 
															-
														
 
															     @torch.no_grad()
														
 
															     def __call__(self, fpn_strides, anchors, targets):
														
 
															         """
														
@@ -216,163 +214,5 @@ class FcosMatcher(object):
 
															                 gt_anchors_deltas.append(gt_anchors_reg_deltas_i.float())
														
 
															                 gt_centerness.append(gt_centerness_i.float())
														
 
															-
														
 
															         # [B, M], [B, M, 4], [B, M]
														
 
															         return torch.stack(gt_classes), torch.stack(gt_anchors_deltas), torch.stack(gt_centerness)
														
 
															-
														
 
															-
														
 
															-class AlignedOTAMatcher(object):
														
 
															-    """
														
 
															-    This code referenced to https://github.com/open-mmlab/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py
														
 
															-    """
														
 
															-    def __init__(self, num_classes, soft_center_radius=3.0, topk_candidates=13):
														
 
															-        self.num_classes = num_classes
														
 
															-        self.soft_center_radius = soft_center_radius
														
 
															-        self.topk_candidates = topk_candidates
														
 
															-
														
 
															-    @torch.no_grad()
														
 
															-    def __call__(self, 
														
 
															-                 fpn_strides, 
														
 
															-                 anchors, 
														
 
															-                 pred_cls, 
														
 
															-                 pred_box,
														
 
															-                 gt_labels,
														
 
															-                 gt_bboxes):
														
 
															-        # [M,]
														
 
															-        strides = torch.cat([torch.ones_like(anchor_i[:, 0]) * stride_i
														
 
															-                                for stride_i, anchor_i in zip(fpn_strides, anchors)], dim=-1)
														
 
															-        # List[F, M, 2] -> [M, 2]
														
 
															-        num_gt = len(gt_labels)
														
 
															-        anchors = torch.cat(anchors, dim=0)
														
 
															-
														
 
															-        # check gt
														
 
															-        if num_gt == 0 or gt_bboxes.max().item() == 0.:
														
 
															-            return {
														
 
															-                'assigned_labels': gt_labels.new_full(pred_cls[..., 0].shape,
														
 
															-                                                      self.num_classes,
														
 
															-                                                      dtype=torch.long),
														
 
															-                'assigned_bboxes': gt_bboxes.new_full(pred_box.shape, 0),
														
 
															-                'assign_metrics': gt_bboxes.new_full(pred_cls[..., 0].shape, 0)
														
 
															-            }
														
 
															-        
														
 
															-        # get inside points: [N, M]
														
 
															-        is_in_gt = self.find_inside_points(gt_bboxes, anchors)
														
 
															-        valid_mask = is_in_gt.sum(dim=0) > 0  # [M,]
														
 
															-
														
 
															-        # ----------------------------------- soft center prior -----------------------------------
														
 
															-        gt_center = (gt_bboxes[..., :2] + gt_bboxes[..., 2:]) / 2.0
														
 
															-        distance = (anchors.unsqueeze(0) - gt_center.unsqueeze(1)
														
 
															-                    ).pow(2).sum(-1).sqrt() / strides.unsqueeze(0)  # [N, M]
														
 
															-        distance = distance * valid_mask.unsqueeze(0)
														
 
															-        soft_center_prior = torch.pow(10, distance - self.soft_center_radius)
														
 
															-
														
 
															-        # ----------------------------------- regression cost -----------------------------------
														
 
															-        pair_wise_ious, _ = box_iou(gt_bboxes, pred_box)  # [N, M]
														
 
															-        pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) * 3.0
														
 
															-
														
 
															-        # ----------------------------------- classification cost -----------------------------------
														
 
															-        ## select the predicted scores corresponded to the gt_labels
														
 
															-        pairwise_pred_scores = pred_cls.permute(1, 0)  # [M, C] -> [C, M]
														
 
															-        pairwise_pred_scores = pairwise_pred_scores[gt_labels.long(), :].float()   # [N, M]
														
 
															-        ## scale factor
														
 
															-        scale_factor = (pair_wise_ious - pairwise_pred_scores.sigmoid()).abs().pow(2.0)
														
 
															-        ## cls cost
														
 
															-        pair_wise_cls_loss = F.binary_cross_entropy_with_logits(
														
 
															-            pairwise_pred_scores, pair_wise_ious,
														
 
															-            reduction="none") * scale_factor # [N, M]
														
 
															-            
														
 
															-        del pairwise_pred_scores
														
 
															-
														
 
															-        ## foreground cost matrix
														
 
															-        cost_matrix = pair_wise_cls_loss + pair_wise_ious_loss + soft_center_prior
														
 
															-        max_pad_value = torch.ones_like(cost_matrix) * 1e9
														
 
															-        cost_matrix = torch.where(valid_mask[None].repeat(num_gt, 1),   # [N, M]
														
 
															-                                  cost_matrix, max_pad_value)
														
 
															-
														
 
															-        # ----------------------------------- dynamic label assignment -----------------------------------
														
 
															-        matched_pred_ious, matched_gt_inds, fg_mask_inboxes = self.dynamic_k_matching(
														
 
															-            cost_matrix, pair_wise_ious, num_gt)
														
 
															-        del pair_wise_cls_loss, cost_matrix, pair_wise_ious, pair_wise_ious_loss
														
 
															-
														
 
															-        # -----------------------------------process assigned labels -----------------------------------
														
 
															-        assigned_labels = gt_labels.new_full(pred_cls[..., 0].shape,
														
 
															-                                             self.num_classes)  # [M,]
														
 
															-        assigned_labels[fg_mask_inboxes] = gt_labels[matched_gt_inds].squeeze(-1)
														
 
															-        assigned_labels = assigned_labels.long()  # [M,]
														
 
															-
														
 
															-        assigned_bboxes = gt_bboxes.new_full(pred_box.shape, 0)        # [M, 4]
														
 
															-        assigned_bboxes[fg_mask_inboxes] = gt_bboxes[matched_gt_inds]  # [M, 4]
														
 
															-
														
 
															-        assign_metrics = gt_bboxes.new_full(pred_cls[..., 0].shape, 0) # [M,]
														
 
															-        assign_metrics[fg_mask_inboxes] = matched_pred_ious            # [M,]
														
 
															-
														
 
															-        assigned_dict = dict(
														
 
															-            assigned_labels=assigned_labels,
														
 
															-            assigned_bboxes=assigned_bboxes,
														
 
															-            assign_metrics=assign_metrics
														
 
															-            )
														
 
															-        
														
 
															-        return assigned_dict
														
 
															-
														
 
															-    def find_inside_points(self, gt_bboxes, anchors):
														
 
															-        """
														
 
															-            gt_bboxes: Tensor -> [N, 2]
														
 
															-            anchors:   Tensor -> [M, 2]
														
 
															-        """
														
 
															-        num_anchors = anchors.shape[0]
														
 
															-        num_gt = gt_bboxes.shape[0]
														
 
															-
														
 
															-        anchors_expand = anchors.unsqueeze(0).repeat(num_gt, 1, 1)           # [N, M, 2]
														
 
															-        gt_bboxes_expand = gt_bboxes.unsqueeze(1).repeat(1, num_anchors, 1)  # [N, M, 4]
														
 
															-
														
 
															-        # offset
														
 
															-        lt = anchors_expand - gt_bboxes_expand[..., :2]
														
 
															-        rb = gt_bboxes_expand[..., 2:] - anchors_expand
														
 
															-        bbox_deltas = torch.cat([lt, rb], dim=-1)
														
 
															-
														
 
															-        is_in_gts = bbox_deltas.min(dim=-1).values > 0
														
 
															-
														
 
															-        return is_in_gts
														
 
															-    
														
 
															-    def dynamic_k_matching(self, cost_matrix, pairwise_ious, num_gt):
														
 
															-        """Use IoU and matching cost to calculate the dynamic top-k positive
														
 
															-        targets.
														
 
															-
														
 
															-        Args:
														
 
															-            cost_matrix (Tensor): Cost matrix.
														
 
															-            pairwise_ious (Tensor): Pairwise iou matrix.
														
 
															-            num_gt (int): Number of gt.
														
 
															-            valid_mask (Tensor): Mask for valid bboxes.
														
 
															-        Returns:
														
 
															-            tuple: matched ious and gt indexes.
														
 
															-        """
														
 
															-        matching_matrix = torch.zeros_like(cost_matrix, dtype=torch.uint8)
														
 
															-        # select candidate topk ious for dynamic-k calculation
														
 
															-        candidate_topk = min(self.topk_candidates, pairwise_ious.size(1))
														
 
															-        topk_ious, _ = torch.topk(pairwise_ious, candidate_topk, dim=1)
														
 
															-        # calculate dynamic k for each gt
														
 
															-        dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1)
														
 
															-
														
 
															-        # sorting the batch cost matirx is faster than topk
														
 
															-        _, sorted_indices = torch.sort(cost_matrix, dim=1)
														
 
															-        for gt_idx in range(num_gt):
														
 
															-            topk_ids = sorted_indices[gt_idx, :dynamic_ks[gt_idx]]
														
 
															-            matching_matrix[gt_idx, :][topk_ids] = 1
														
 
															-
														
 
															-        del topk_ious, dynamic_ks, topk_ids
														
 
															-
														
 
															-        prior_match_gt_mask = matching_matrix.sum(0) > 1
														
 
															-        if prior_match_gt_mask.sum() > 0:
														
 
															-            cost_min, cost_argmin = torch.min(
														
 
															-                cost_matrix[:, prior_match_gt_mask], dim=0)
														
 
															-            matching_matrix[:, prior_match_gt_mask] *= 0
														
 
															-            matching_matrix[cost_argmin, prior_match_gt_mask] = 1
														
 
															-
														
 
															-        # get foreground mask inside box and center prior
														
 
															-        fg_mask_inboxes = matching_matrix.sum(0) > 0
														
 
															-        matched_pred_ious = (matching_matrix *
														
 
															-                             pairwise_ious).sum(0)[fg_mask_inboxes]
														
 
															-        matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0)
														
 
															-
														
 
															-        return matched_pred_ious, matched_gt_inds, fg_mask_inboxes
														
 
															-        
														
--- a/yolo/models/fcos/modules.py
+++ b/yolo/models/fcos/modules.py
@@ -4,145 +4,19 @@ from typing import List
 
															 # --------------------- Basic modules ---------------------
														
 
															-def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
														
 
															-    conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
														
 
															-
														
 
															-    return conv
														
 
															-
														
 
															-def get_activation(act_type=None):
														
 
															-    if act_type == 'relu':
														
 
															-        return nn.ReLU(inplace=True)
														
 
															-    elif act_type == 'lrelu':
														
 
															-        return nn.LeakyReLU(0.1, inplace=True)
														
 
															-    elif act_type == 'mish':
														
 
															-        return nn.Mish(inplace=True)
														
 
															-    elif act_type == 'silu':
														
 
															-        return nn.SiLU(inplace=True)
														
 
															-    elif act_type is None:
														
 
															-        return nn.Identity()
														
 
															-    else:
														
 
															-        raise NotImplementedError
														
 
															-        
														
 
															-def get_norm(norm_type, dim):
														
 
															-    if norm_type == 'BN':
														
 
															-        return nn.BatchNorm2d(dim)
														
 
															-    elif norm_type == 'GN':
														
 
															-        return nn.GroupNorm(num_groups=32, num_channels=dim)
														
 
															-    elif norm_type is None:
														
 
															-        return nn.Identity()
														
 
															-    else:
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															-class BasicConv(nn.Module):
														
 
															+class ConvModule(nn.Module):
														
 
															     def __init__(self, 
														
 
															-                 in_dim,                   # in channels
														
 
															-                 out_dim,                  # out channels 
														
 
															-                 kernel_size=1,            # kernel size 
														
 
															-                 padding=0,                # padding
														
 
															-                 stride=1,                 # padding
														
 
															-                 dilation=1,               # dilation
														
 
															-                 act_type  :str = 'lrelu', # activation
														
 
															-                 norm_type :str = 'BN',    # normalization
														
 
															-                 depthwise :bool = False
														
 
															+                 in_dim,         # in channels
														
 
															+                 out_dim,        # out channels 
														
 
															+                 kernel_size=1,  # kernel size 
														
 
															+                 padding=0,      # padding
														
 
															+                 stride=1,       # padding
														
 
															+                 dilation=1,     # dilation
														
 
															                 ):
														
 
															-        super(BasicConv, self).__init__()
														
 
															-        self.depthwise = depthwise
														
 
															-        use_bias = False if norm_type is not None else True
														
 
															-        if not depthwise:
														
 
															-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1, bias=use_bias)
														
 
															-            self.norm = get_norm(norm_type, out_dim)
														
 
															-        else:
														
 
															-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim, bias=use_bias)
														
 
															-            self.norm1 = get_norm(norm_type, in_dim)
														
 
															-            self.conv2 = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, d=1, g=1)
														
 
															-            self.norm2 = get_norm(norm_type, out_dim)
														
 
															-        self.act  = get_activation(act_type)
														
 
															+        super(ConvModule, self).__init__()
														
 
															+        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False)
														
 
															+        self.norm = nn.GroupNorm(num_groups=32, num_channels=out_dim)
														
 
															+        self.act  = nn.ReLU(inplace=True)
														
 
															     def forward(self, x):
														
 
															-        if not self.depthwise:
														
 
															-            return self.act(self.norm(self.conv(x)))
														
 
															-        else:
														
 
															-            # Depthwise conv
														
 
															-            x = self.norm1(self.conv1(x))
														
 
															-            # Pointwise conv
														
 
															-            x = self.act(self.norm2(self.conv2(x)))
														
 
															-            return x
														
 
															-
														
 
															-
														
 
															-# --------------------- ResNet modules ---------------------
														
 
															-def conv3x3(in_planes, out_planes, stride=1):
														
 
															-    """3x3 convolution with padding"""
														
 
															-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
														
 
															-                     padding=1, bias=False)
														
 
															-
														
 
															-def conv1x1(in_planes, out_planes, stride=1):
														
 
															-    """1x1 convolution"""
														
 
															-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
														
 
															-
														
 
															-class BasicBlock(nn.Module):
														
 
															-    expansion = 1
														
 
															-
														
 
															-    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															-        super(BasicBlock, self).__init__()
														
 
															-        self.conv1 = conv3x3(inplanes, planes, stride)
														
 
															-        self.bn1 = nn.BatchNorm2d(planes)
														
 
															-        self.relu = nn.ReLU(inplace=True)
														
 
															-        self.conv2 = conv3x3(planes, planes)
														
 
															-        self.bn2 = nn.BatchNorm2d(planes)
														
 
															-        self.downsample = downsample
														
 
															-        self.stride = stride
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        identity = x
														
 
															-
														
 
															-        out = self.conv1(x)
														
 
															-        out = self.bn1(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv2(out)
														
 
															-        out = self.bn2(out)
														
 
															-
														
 
															-        if self.downsample is not None:
														
 
															-            identity = self.downsample(x)
														
 
															-
														
 
															-        out += identity
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        return out
														
 
															-
														
 
															-class Bottleneck(nn.Module):
														
 
															-    expansion = 4
														
 
															-
														
 
															-    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															-        super(Bottleneck, self).__init__()
														
 
															-        self.conv1 = conv1x1(inplanes, planes)
														
 
															-        self.bn1 = nn.BatchNorm2d(planes)
														
 
															-        self.conv2 = conv3x3(planes, planes, stride)
														
 
															-        self.bn2 = nn.BatchNorm2d(planes)
														
 
															-        self.conv3 = conv1x1(planes, planes * self.expansion)
														
 
															-        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
														
 
															-        self.relu = nn.ReLU(inplace=True)
														
 
															-        self.downsample = downsample
														
 
															-        self.stride = stride
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        identity = x
														
 
															-
														
 
															-        out = self.conv1(x)
														
 
															-        out = self.bn1(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv2(out)
														
 
															-        out = self.bn2(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv3(out)
														
 
															-        out = self.bn3(out)
														
 
															-
														
 
															-        if self.downsample is not None:
														
 
															-            identity = self.downsample(x)
														
 
															-
														
 
															-        out += identity
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        return out
														
 
															+        return self.act(self.norm(self.conv(x)))
														
--- a/yolo/models/fcos/resnet.py
+++ b/yolo/models/fcos/resnet.py
@@ -2,15 +2,10 @@ import torch
 
															 import torch.nn as nn
														
 
															 import torch.utils.model_zoo as model_zoo
														
 
															-try:
														
 
															-    from .modules import conv1x1, BasicBlock, Bottleneck
														
 
															-except:
														
 
															-    from  modules import conv1x1, BasicBlock, Bottleneck
														
 
															 __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
														
 
															            'resnet152']
														
 
															-
														
 
															 model_urls = {
														
 
															     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
														
 
															     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
														
@@ -20,9 +15,87 @@ model_urls = {
 
															 }
														
 
															+# --------------------- ResNet modules ---------------------
														
 
															+def conv3x3(in_planes, out_planes, stride=1):
														
 
															+    """3x3 convolution with padding"""
														
 
															+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
														
 
															+                     padding=1, bias=False)
														
 
															+
														
 
															+def conv1x1(in_planes, out_planes, stride=1):
														
 
															+    """1x1 convolution"""
														
 
															+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
														
 
															+
														
 
															+class BasicBlock(nn.Module):
														
 
															+    expansion = 1
														
 
															+
														
 
															+    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															+        super(BasicBlock, self).__init__()
														
 
															+        self.conv1 = conv3x3(inplanes, planes, stride)
														
 
															+        self.bn1 = nn.BatchNorm2d(planes)
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+        self.conv2 = conv3x3(planes, planes)
														
 
															+        self.bn2 = nn.BatchNorm2d(planes)
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+
														
 
															+        out = self.conv1(x)
														
 
															+        out = self.bn1(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv2(out)
														
 
															+        out = self.bn2(out)
														
 
															+
														
 
															+        if self.downsample is not None:
														
 
															+            identity = self.downsample(x)
														
 
															+
														
 
															+        out += identity
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+class Bottleneck(nn.Module):
														
 
															+    expansion = 4
														
 
															+
														
 
															+    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															+        super(Bottleneck, self).__init__()
														
 
															+        self.conv1 = conv1x1(inplanes, planes)
														
 
															+        self.bn1 = nn.BatchNorm2d(planes)
														
 
															+        self.conv2 = conv3x3(planes, planes, stride)
														
 
															+        self.bn2 = nn.BatchNorm2d(planes)
														
 
															+        self.conv3 = conv1x1(planes, planes * self.expansion)
														
 
															+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+
														
 
															+        out = self.conv1(x)
														
 
															+        out = self.bn1(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv2(out)
														
 
															+        out = self.bn2(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv3(out)
														
 
															+        out = self.bn3(out)
														
 
															+
														
 
															+        if self.downsample is not None:
														
 
															+            identity = self.downsample(x)
														
 
															+
														
 
															+        out += identity
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															 # --------------------- ResNet -----------------------
														
 
															 class ResNet(nn.Module):
														
 
															-
														
 
															     def __init__(self, block, layers, zero_init_residual=False):
														
 
															         super(ResNet, self).__init__()
														
 
															         self.inplanes = 64
														
@@ -86,27 +159,29 @@ class ResNet(nn.Module):
 
															         c4 = self.layer3(c3)   # [B, C, H/16, W/16]
														
 
															         c5 = self.layer4(c4)   # [B, C, H/32, W/32]
														
 
															-        return c5
														
 
															+        return [c3, c4, c5]
														
 
															-
														
 
															-# --------------------- Functions -----------------------
														
 
															 def build_resnet(model_name="resnet18", pretrained=False):
														
 
															     if model_name == 'resnet18':
														
 
															         model = resnet18(pretrained)
														
 
															-        feat_dim = 512
														
 
															+        feat_dims = [128, 256, 512]
														
 
															+
														
 
															     elif model_name == 'resnet34':
														
 
															         model = resnet34(pretrained)
														
 
															-        feat_dim = 512
														
 
															+        feat_dims = [128, 256, 512]
														
 
															+
														
 
															     elif model_name == 'resnet50':
														
 
															         model = resnet50(pretrained)
														
 
															-        feat_dim = 2048
														
 
															+        feat_dims = [512, 1024, 2048]
														
 
															+
														
 
															     elif model_name == 'resnet101':
														
 
															-        model = resnet34(pretrained)
														
 
															-        feat_dim = 2048
														
 
															+        model = resnet101(pretrained)
														
 
															+        feat_dims = [512, 1024, 2048]
														
 
															+
														
 
															     else:
														
 
															         raise NotImplementedError("Unknown resnet: {}".format(model_name))
														
 
															-    return model, feat_dim
														
 
															+    return model, feat_dims
														
 
															 def resnet18(pretrained=False, **kwargs):
														
 
															     """Constructs a ResNet-18 model.
														
--- a/yolo/models/yolof/build.py
+++ b/yolo/models/yolof/build.py
@@ -0,0 +1,16 @@
 
															+from .loss import SetCriterion
														
 
															+from .yolof import Yolof
														
 
															+
														
 
															+
														
 
															+# build object detector
														
 
															+def build_yolof(cfg, is_val=False):
														
 
															+    # -------------- Build YOLO --------------
														
 
															+    model = Yolof(cfg, is_val)
														
 
															+  
														
 
															+    # -------------- Build criterion --------------
														
 
															+    criterion = None
														
 
															+    if is_val:
														
 
															+        # build criterion for training
														
 
															+        criterion = SetCriterion(cfg)
														
 
															+        
														
 
															+    return model, criterion
														
--- a/yolo/models/yolof/loss.py
+++ b/yolo/models/yolof/loss.py
@@ -8,24 +8,27 @@ from utils.distributed_utils import get_world_size, is_dist_avail_and_initialize
 
															 from .matcher import UniformMatcher
														
 
															-class SetCriterion(nn.Module):
														
 
															+class SetCriterion(object):
														
 
															     """
														
 
															         This code referenced to https://github.com/megvii-model/YOLOF/blob/main/playground/detection/coco/yolof/yolof_base/yolof.py
														
 
															     """
														
 
															     def __init__(self, cfg):
														
 
															-        super().__init__()
														
 
															         # ------------- Basic parameters -------------
														
 
															         self.cfg = cfg
														
 
															         self.num_classes = cfg.num_classes
														
 
															+
														
 
															         # ------------- Focal loss -------------
														
 
															         self.alpha = cfg.focal_loss_alpha
														
 
															         self.gamma = cfg.focal_loss_gamma
														
 
															+
														
 
															         # ------------- Loss weight -------------
														
 
															-        self.weight_dict = {'loss_cls': cfg.loss_cls_weight,
														
 
															-                            'loss_reg': cfg.loss_reg_weight}
														
 
															+        self.weight_dict = {'loss_cls': cfg.loss_cls,
														
 
															+                            'loss_reg': cfg.loss_reg}
														
 
															+        
														
 
															         # ------------- Matcher -------------
														
 
															-        self.matcher_cfg = cfg.matcher_hpy
														
 
															-        self.matcher = UniformMatcher(self.matcher_cfg['topk_candidates'])
														
 
															+        self.ignore_thresh = cfg.ignore_thresh
														
 
															+        self.match_iou_weight = cfg.match_iou_thresh
														
 
															+        self.matcher = UniformMatcher(cfg.match_topk_candidates)
														
 
															     def loss_labels(self, pred_cls, tgt_cls, num_boxes):
														
 
															         """
														
@@ -49,7 +52,7 @@ class SetCriterion(nn.Module):
 
															         return loss_reg.sum() / num_boxes
														
 
															-    def forward(self, outputs, targets):
														
 
															+    def __call__(self, outputs, targets):
														
 
															         """
														
 
															             outputs['pred_cls']: (Tensor) [B, M, C]
														
 
															             outputs['pred_box']: (Tensor) [B, M, 4]
														
@@ -61,29 +64,28 @@ class SetCriterion(nn.Module):
 
															         pred_box = outputs['pred_box']
														
 
															         pred_cls = outputs['pred_cls'].reshape(-1, self.num_classes)
														
 
															         anchor_boxes = outputs['anchors']
														
 
															-        masks = ~outputs['mask']
														
 
															         device = pred_box.device
														
 
															-        B = len(targets)
														
 
															+        bs = len(targets)
														
 
															         # -------------------- Label assignment --------------------
														
 
															         indices = self.matcher(pred_box, anchor_boxes, targets)
														
 
															         # [M, 4] -> [1, M, 4] -> [B, M, 4]
														
 
															         anchor_boxes = box_cxcywh_to_xyxy(anchor_boxes)
														
 
															-        anchor_boxes = anchor_boxes[None].repeat(B, 1, 1)
														
 
															+        anchor_boxes = anchor_boxes[None].repeat(bs, 1, 1)
														
 
															         ious = []
														
 
															         pos_ious = []
														
 
															-        for i in range(B):
														
 
															+        for i in range(bs):
														
 
															             src_idx, tgt_idx = indices[i]
														
 
															             # iou between predbox and tgt box
														
 
															-            iou, _ = box_iou(pred_box[i, ...], (targets[i]['boxes']).clone())
														
 
															+            iou, _ = box_iou(pred_box[i, ...], (targets[i]['boxes']).clone().to(device))
														
 
															             if iou.numel() == 0:
														
 
															                 max_iou = iou.new_full((iou.size(0),), 0)
														
 
															             else:
														
 
															                 max_iou = iou.max(dim=1)[0]
														
 
															             # iou between anchorbox and tgt box
														
 
															-            a_iou, _ = box_iou(anchor_boxes[i], (targets[i]['boxes']).clone())
														
 
															+            a_iou, _ = box_iou(anchor_boxes[i], (targets[i]['boxes']).clone().to(device))
														
 
															             if a_iou.numel() == 0:
														
 
															                 pos_iou = a_iou.new_full((0,), 0)
														
 
															             else:
														
@@ -92,42 +94,41 @@ class SetCriterion(nn.Module):
 
															             pos_ious.append(pos_iou)
														
 
															         ious = torch.cat(ious)
														
 
															-        ignore_idx = ious > self.matcher_cfg['ignore_thresh']
														
 
															+        ignore_idx = ious > self.ignore_thresh
														
 
															         pos_ious = torch.cat(pos_ious)
														
 
															-        pos_ignore_idx = pos_ious < self.matcher_cfg['iou_thresh']
														
 
															+        pos_ignore_idx = pos_ious < self.match_iou_weight
														
 
															         src_idx = torch.cat(
														
 
															             [src + idx * anchor_boxes[0].shape[0] for idx, (src, _) in
														
 
															              enumerate(indices)])
														
 
															         # [BM,]
														
 
															         gt_cls = torch.full(pred_cls.shape[:1],
														
 
															-                                self.num_classes,
														
 
															-                                dtype=torch.int64,
														
 
															-                                device=device)
														
 
															+                            self.num_classes,
														
 
															+                            dtype=torch.int64,
														
 
															+                            device=device)
														
 
															         gt_cls[ignore_idx] = -1
														
 
															         tgt_cls_o = torch.cat([t['labels'][J] for t, (_, J) in zip(targets, indices)])
														
 
															         tgt_cls_o[pos_ignore_idx] = -1
														
 
															         gt_cls[src_idx] = tgt_cls_o.to(device)
														
 
															-        foreground_idxs = (gt_cls >= 0) & (gt_cls != self.num_classes)
														
 
															-        num_foreground = foreground_idxs.sum()
														
 
															+        fg_mask = (gt_cls >= 0) & (gt_cls != self.num_classes)
														
 
															+        num_fgs = fg_mask.sum()
														
 
															         if is_dist_avail_and_initialized():
														
 
															-            torch.distributed.all_reduce(num_foreground)
														
 
															-        num_foreground = torch.clamp(num_foreground / get_world_size(), min=1).item()
														
 
															+            torch.distributed.all_reduce(num_fgs)
														
 
															+        num_fgs = torch.clamp(num_fgs / get_world_size(), min=1).item()
														
 
															         # -------------------- Classification loss --------------------
														
 
															         gt_cls_target = torch.zeros_like(pred_cls)
														
 
															-        gt_cls_target[foreground_idxs, gt_cls[foreground_idxs]] = 1
														
 
															-        valid_idxs = (gt_cls >= 0) & masks
														
 
															-        loss_labels = self.loss_labels(pred_cls[valid_idxs], gt_cls_target[valid_idxs], num_foreground)
														
 
															+        gt_cls_target[fg_mask, gt_cls[fg_mask]] = 1
														
 
															+        loss_labels = self.loss_labels(pred_cls, gt_cls_target, num_fgs)
														
 
															         # -------------------- Regression loss --------------------
														
 
															         tgt_boxes = torch.cat([t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0).to(device)
														
 
															         tgt_boxes = tgt_boxes[~pos_ignore_idx]
														
 
															         matched_pred_box = pred_box.reshape(-1, 4)[src_idx[~pos_ignore_idx.cpu()]]
														
 
															-        loss_bboxes = self.loss_bboxes(matched_pred_box, tgt_boxes, num_foreground)
														
 
															+        loss_bboxes = self.loss_bboxes(matched_pred_box, tgt_boxes, num_fgs)
														
 
															         total_loss = loss_labels * self.weight_dict["loss_cls"] + \
														
 
															                      loss_bboxes * self.weight_dict["loss_reg"]
														
--- a/yolo/models/yolof/matcher.py
+++ b/yolo/models/yolof/matcher.py
@@ -30,7 +30,7 @@ class UniformMatcher(nn.Module):
 
															         anchor_boxes = anchor_boxes.flatten(0, 1)
														
 
															         # Also concat the target boxes
														
 
															-        tgt_bbox = torch.cat([v['boxes'] for v in targets])
														
 
															+        tgt_bbox = torch.cat([v['boxes'] for v in targets]).to(out_bbox.device)
														
 
															         # Compute the L1 cost between boxes
														
 
															         # Note that we use anchors and predict boxes both
														
--- a/yolo/models/yolof/modules.py
+++ b/yolo/models/yolof/modules.py
@@ -4,145 +4,20 @@ from typing import List
 
															 # --------------------- Basic modules ---------------------
														
 
															-def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
														
 
															-    conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
														
 
															-
														
 
															-    return conv
														
 
															-
														
 
															-def get_activation(act_type=None):
														
 
															-    if act_type == 'relu':
														
 
															-        return nn.ReLU(inplace=True)
														
 
															-    elif act_type == 'lrelu':
														
 
															-        return nn.LeakyReLU(0.1, inplace=True)
														
 
															-    elif act_type == 'mish':
														
 
															-        return nn.Mish(inplace=True)
														
 
															-    elif act_type == 'silu':
														
 
															-        return nn.SiLU(inplace=True)
														
 
															-    elif act_type is None:
														
 
															-        return nn.Identity()
														
 
															-    else:
														
 
															-        raise NotImplementedError
														
 
															-        
														
 
															-def get_norm(norm_type, dim):
														
 
															-    if norm_type == 'BN':
														
 
															-        return nn.BatchNorm2d(dim)
														
 
															-    elif norm_type == 'GN':
														
 
															-        return nn.GroupNorm(num_groups=32, num_channels=dim)
														
 
															-    elif norm_type is None:
														
 
															-        return nn.Identity()
														
 
															-    else:
														
 
															-        raise NotImplementedError
														
 
															-
														
 
															-class BasicConv(nn.Module):
														
 
															+class ConvModule(nn.Module):
														
 
															     def __init__(self, 
														
 
															-                 in_dim,                   # in channels
														
 
															-                 out_dim,                  # out channels 
														
 
															-                 kernel_size=1,            # kernel size 
														
 
															-                 padding=0,                # padding
														
 
															-                 stride=1,                 # padding
														
 
															-                 dilation=1,               # dilation
														
 
															-                 act_type  :str = 'lrelu', # activation
														
 
															-                 norm_type :str = 'BN',    # normalization
														
 
															-                 depthwise :bool = False
														
 
															+                 in_dim: int,           # in channels
														
 
															+                 out_dim: int,          # out channels 
														
 
															+                 kernel_size: int = 1,  # kernel size 
														
 
															+                 padding: int = 0,      # padding
														
 
															+                 stride: int = 1,       # padding
														
 
															+                 dilation: int = 1,     # dilation
														
 
															+                 use_act: bool = False,
														
 
															                 ):
														
 
															-        super(BasicConv, self).__init__()
														
 
															-        self.depthwise = depthwise
														
 
															-        use_bias = False if norm_type is not None else True
														
 
															-        if not depthwise:
														
 
															-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1, bias=use_bias)
														
 
															-            self.norm = get_norm(norm_type, out_dim)
														
 
															-        else:
														
 
															-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim, bias=use_bias)
														
 
															-            self.norm1 = get_norm(norm_type, in_dim)
														
 
															-            self.conv2 = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, d=1, g=1)
														
 
															-            self.norm2 = get_norm(norm_type, out_dim)
														
 
															-        self.act  = get_activation(act_type)
														
 
															+        super(ConvModule, self).__init__()
														
 
															+        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False)
														
 
															+        self.norm = nn.BatchNorm2d(out_dim)
														
 
															+        self.act  = nn.ReLU(inplace=True) if use_act else nn.Identity()
														
 
															     def forward(self, x):
														
 
															-        if not self.depthwise:
														
 
															-            return self.act(self.norm(self.conv(x)))
														
 
															-        else:
														
 
															-            # Depthwise conv
														
 
															-            x = self.norm1(self.conv1(x))
														
 
															-            # Pointwise conv
														
 
															-            x = self.act(self.norm2(self.conv2(x)))
														
 
															-            return x
														
 
															-
														
 
															-
														
 
															-# --------------------- ResNet modules ---------------------
														
 
															-def conv3x3(in_planes, out_planes, stride=1):
														
 
															-    """3x3 convolution with padding"""
														
 
															-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
														
 
															-                     padding=1, bias=False)
														
 
															-
														
 
															-def conv1x1(in_planes, out_planes, stride=1):
														
 
															-    """1x1 convolution"""
														
 
															-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
														
 
															-
														
 
															-class BasicBlock(nn.Module):
														
 
															-    expansion = 1
														
 
															-
														
 
															-    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															-        super(BasicBlock, self).__init__()
														
 
															-        self.conv1 = conv3x3(inplanes, planes, stride)
														
 
															-        self.bn1 = nn.BatchNorm2d(planes)
														
 
															-        self.relu = nn.ReLU(inplace=True)
														
 
															-        self.conv2 = conv3x3(planes, planes)
														
 
															-        self.bn2 = nn.BatchNorm2d(planes)
														
 
															-        self.downsample = downsample
														
 
															-        self.stride = stride
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        identity = x
														
 
															-
														
 
															-        out = self.conv1(x)
														
 
															-        out = self.bn1(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv2(out)
														
 
															-        out = self.bn2(out)
														
 
															-
														
 
															-        if self.downsample is not None:
														
 
															-            identity = self.downsample(x)
														
 
															-
														
 
															-        out += identity
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        return out
														
 
															-
														
 
															-class Bottleneck(nn.Module):
														
 
															-    expansion = 4
														
 
															-
														
 
															-    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															-        super(Bottleneck, self).__init__()
														
 
															-        self.conv1 = conv1x1(inplanes, planes)
														
 
															-        self.bn1 = nn.BatchNorm2d(planes)
														
 
															-        self.conv2 = conv3x3(planes, planes, stride)
														
 
															-        self.bn2 = nn.BatchNorm2d(planes)
														
 
															-        self.conv3 = conv1x1(planes, planes * self.expansion)
														
 
															-        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
														
 
															-        self.relu = nn.ReLU(inplace=True)
														
 
															-        self.downsample = downsample
														
 
															-        self.stride = stride
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        identity = x
														
 
															-
														
 
															-        out = self.conv1(x)
														
 
															-        out = self.bn1(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv2(out)
														
 
															-        out = self.bn2(out)
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        out = self.conv3(out)
														
 
															-        out = self.bn3(out)
														
 
															-
														
 
															-        if self.downsample is not None:
														
 
															-            identity = self.downsample(x)
														
 
															-
														
 
															-        out += identity
														
 
															-        out = self.relu(out)
														
 
															-
														
 
															-        return out
														
 
															+        return self.act(self.norm(self.conv(x)))
														
--- a/yolo/models/yolof/resnet.py
+++ b/yolo/models/yolof/resnet.py
@@ -2,15 +2,10 @@ import torch
 
															 import torch.nn as nn
														
 
															 import torch.utils.model_zoo as model_zoo
														
 
															-try:
														
 
															-    from .modules import conv1x1, BasicBlock, Bottleneck
														
 
															-except:
														
 
															-    from  modules import conv1x1, BasicBlock, Bottleneck
														
 
															 __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
														
 
															            'resnet152']
														
 
															-
														
 
															 model_urls = {
														
 
															     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
														
 
															     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
														
@@ -20,9 +15,87 @@ model_urls = {
 
															 }
														
 
															+# --------------------- ResNet modules ---------------------
														
 
															+def conv3x3(in_planes, out_planes, stride=1):
														
 
															+    """3x3 convolution with padding"""
														
 
															+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
														
 
															+                     padding=1, bias=False)
														
 
															+
														
 
															+def conv1x1(in_planes, out_planes, stride=1):
														
 
															+    """1x1 convolution"""
														
 
															+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
														
 
															+
														
 
															+class BasicBlock(nn.Module):
														
 
															+    expansion = 1
														
 
															+
														
 
															+    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															+        super(BasicBlock, self).__init__()
														
 
															+        self.conv1 = conv3x3(inplanes, planes, stride)
														
 
															+        self.bn1 = nn.BatchNorm2d(planes)
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+        self.conv2 = conv3x3(planes, planes)
														
 
															+        self.bn2 = nn.BatchNorm2d(planes)
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+
														
 
															+        out = self.conv1(x)
														
 
															+        out = self.bn1(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv2(out)
														
 
															+        out = self.bn2(out)
														
 
															+
														
 
															+        if self.downsample is not None:
														
 
															+            identity = self.downsample(x)
														
 
															+
														
 
															+        out += identity
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+class Bottleneck(nn.Module):
														
 
															+    expansion = 4
														
 
															+
														
 
															+    def __init__(self, inplanes, planes, stride=1, downsample=None):
														
 
															+        super(Bottleneck, self).__init__()
														
 
															+        self.conv1 = conv1x1(inplanes, planes)
														
 
															+        self.bn1 = nn.BatchNorm2d(planes)
														
 
															+        self.conv2 = conv3x3(planes, planes, stride)
														
 
															+        self.bn2 = nn.BatchNorm2d(planes)
														
 
															+        self.conv3 = conv1x1(planes, planes * self.expansion)
														
 
															+        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
														
 
															+        self.relu = nn.ReLU(inplace=True)
														
 
															+        self.downsample = downsample
														
 
															+        self.stride = stride
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        identity = x
														
 
															+
														
 
															+        out = self.conv1(x)
														
 
															+        out = self.bn1(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv2(out)
														
 
															+        out = self.bn2(out)
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        out = self.conv3(out)
														
 
															+        out = self.bn3(out)
														
 
															+
														
 
															+        if self.downsample is not None:
														
 
															+            identity = self.downsample(x)
														
 
															+
														
 
															+        out += identity
														
 
															+        out = self.relu(out)
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+
														
 
															 # --------------------- ResNet -----------------------
														
 
															 class ResNet(nn.Module):
														
 
															-
														
 
															     def __init__(self, block, layers, zero_init_residual=False):
														
 
															         super(ResNet, self).__init__()
														
 
															         self.inplanes = 64
														
@@ -88,21 +161,23 @@ class ResNet(nn.Module):
 
															         return c5
														
 
															-
														
 
															-# --------------------- Functions -----------------------
														
 
															 def build_resnet(model_name="resnet18", pretrained=False):
														
 
															     if model_name == 'resnet18':
														
 
															         model = resnet18(pretrained)
														
 
															         feat_dim = 512
														
 
															+
														
 
															     elif model_name == 'resnet34':
														
 
															         model = resnet34(pretrained)
														
 
															         feat_dim = 512
														
 
															+
														
 
															     elif model_name == 'resnet50':
														
 
															         model = resnet50(pretrained)
														
 
															         feat_dim = 2048
														
 
															+
														
 
															     elif model_name == 'resnet101':
														
 
															-        model = resnet34(pretrained)
														
 
															+        model = resnet101(pretrained)
														
 
															         feat_dim = 2048
														
 
															+
														
 
															     else:
														
 
															         raise NotImplementedError("Unknown resnet: {}".format(model_name))
														
@@ -184,4 +259,4 @@ if __name__=='__main__':
 
															     flops, params = profile(model, inputs=(x, ), verbose=False)
														
 
															     print('==============================')
														
 
															     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
														
 
															-    print('Params : {:.2f} M'.format(params / 1e6))    
														
 
															+    print('Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/yolof/yolof.py
+++ b/yolo/models/yolof/yolof.py
@@ -0,0 +1,91 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+
														
 
															+# --------------- Model components ---------------
														
 
															+from .yolof_backbone import YolofBackbone
														
 
															+from .yolof_encoder  import DilatedEncoder
														
 
															+from .yolof_decoder  import YolofHead
														
 
															+
														
 
															+# --------------- External components ---------------
														
 
															+from utils.misc import multiclass_nms
														
 
															+
														
 
															+
														
 
															+# ------------------------ You Only Look One-level Feature ------------------------
														
 
															+class Yolof(nn.Module):
														
 
															+    def __init__(self, cfg, is_val: bool = False):
														
 
															+        super(Yolof, self).__init__()
														
 
															+        # ---------------------- Basic setting ----------------------
														
 
															+        self.cfg = cfg
														
 
															+        self.num_classes = cfg.num_classes
														
 
															+        ## Post-process parameters
														
 
															+        self.topk_candidates  = cfg.val_topk        if is_val else cfg.test_topk
														
 
															+        self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
														
 
															+        self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
														
 
															+        self.no_multi_labels  = False if is_val else True
														
 
															+
														
 
															+        # ---------------------- Network Parameters ----------------------
														
 
															+        self.backbone = YolofBackbone(cfg)
														
 
															+        self.encoder  = DilatedEncoder(cfg, self.backbone.feat_dim, cfg.head_dim)
														
 
															+        self.decoder  = YolofHead(cfg, self.encoder.out_dim, cfg.head_dim)
														
 
															+
														
 
															+    def post_process(self, cls_pred, box_pred):
														
 
															+        """
														
 
															+        Input:
														
 
															+            cls_pred: (Tensor) [[H x W x KA, C]
														
 
															+            box_pred: (Tensor)  [H x W x KA, 4]
														
 
															+        """
														
 
															+        cls_pred = cls_pred[0]
														
 
															+        box_pred = box_pred[0]
														
 
															+        
														
 
															+        # (H x W x KA x C,)
														
 
															+        scores_i = cls_pred.sigmoid().flatten()
														
 
															+
														
 
															+        # Keep top k top scoring indices only.
														
 
															+        num_topk = min(self.topk_candidates, box_pred.size(0))
														
 
															+
														
 
															+        # torch.sort is actually faster than .topk (at least on GPUs)
														
 
															+        predicted_prob, topk_idxs = scores_i.sort(descending=True)
														
 
															+        topk_scores = predicted_prob[:num_topk]
														
 
															+        topk_idxs = topk_idxs[:num_topk]
														
 
															+
														
 
															+        # filter out the proposals with low confidence score
														
 
															+        keep_idxs = topk_scores > self.conf_thresh
														
 
															+        topk_idxs = topk_idxs[keep_idxs]
														
 
															+
														
 
															+        # final scores
														
 
															+        scores = topk_scores[keep_idxs]
														
 
															+        # final labels
														
 
															+        labels = topk_idxs % self.num_classes
														
 
															+        # final bboxes
														
 
															+        anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
														
 
															+        bboxes = box_pred[anchor_idxs]
														
 
															+
														
 
															+        # to cpu & numpy
														
 
															+        scores = scores.cpu().numpy()
														
 
															+        labels = labels.cpu().numpy()
														
 
															+        bboxes = bboxes.cpu().numpy()
														
 
															+
														
 
															+        # nms
														
 
															+        scores, labels, bboxes = multiclass_nms(
														
 
															+            scores, labels, bboxes, self.nms_thresh, self.num_classes)
														
 
															+
														
 
															+        return bboxes, scores, labels
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x = self.backbone(x)
														
 
															+        x = self.encoder(x)
														
 
															+        outputs = self.decoder(x)
														
 
															+
														
 
															+        if not self.training:
														
 
															+            # ---------------- PostProcess ----------------
														
 
															+            cls_pred = outputs["pred_cls"]
														
 
															+            box_pred = outputs["pred_box"]
														
 
															+            bboxes, scores, labels = self.post_process(cls_pred, box_pred)
														
 
															+
														
 
															+            outputs = {
														
 
															+                'scores': scores,
														
 
															+                'labels': labels,
														
 
															+                'bboxes': bboxes
														
 
															+            }
														
 
															+
														
 
															+        return outputs 
														
--- a/yolo/models/yolof/yolof_backbone.py
+++ b/yolo/models/yolof/yolof_backbone.py
@@ -0,0 +1,50 @@
 
															+import torch
														
 
															+import torch.nn as nn
														
 
															+
														
 
															+try:
														
 
															+    from .resnet import build_resnet
														
 
															+except:
														
 
															+    from  resnet import build_resnet
														
 
															+
														
 
															+
														
 
															+# --------------------- Yolov1's Backbone -----------------------
														
 
															+class YolofBackbone(nn.Module):
														
 
															+    def __init__(self, cfg):
														
 
															+        super().__init__()
														
 
															+        self.backbone, self.feat_dim = build_resnet(cfg.backbone, cfg.use_pretrained)
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        pyramid_feats = self.backbone(x)
														
 
															+
														
 
															+        return pyramid_feats # [C3, C4, C5]
														
 
															+
														
 
															+
														
 
															+if __name__=='__main__':
														
 
															+    from thop import profile
														
 
															+
														
 
															+    # YOLOv1 configuration
														
 
															+    class YolofBaseConfig(object):
														
 
															+        def __init__(self) -> None:
														
 
															+            # ---------------- Model config ----------------
														
 
															+            self.out_stride = 32
														
 
															+            ## Backbone
														
 
															+            self.backbone = 'resnet18'
														
 
															+            self.use_pretrained = True
														
 
															+    cfg = YolofBaseConfig()
														
 
															+
														
 
															+    # Build backbone
														
 
															+    model = YolofBackbone(cfg)
														
 
															+
														
 
															+    # Randomly generate a input data
														
 
															+    x = torch.randn(2, 3, 640, 640)
														
 
															+
														
 
															+    # Inference
														
 
															+    output = model(x)
														
 
															+    print(' - the shape of input :  ', x.shape)
														
 
															+    print(' - the shape of output : ', output.shape)
														
 
															+
														
 
															+    x = torch.randn(1, 3, 640, 640)
														
 
															+    flops, params = profile(model, inputs=(x, ), verbose=False)
														
 
															+    print('============== FLOPs & Params ================')
														
 
															+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
														
 
															+    print(' - Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/yolof/yolof_decoder.py
+++ b/yolo/models/yolof/yolof_decoder.py
@@ -2,62 +2,48 @@ import math
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															-from .modules import BasicConv
														
 
															+try:
														
 
															+    from .modules import ConvModule
														
 
															+except:
														
 
															+    from  modules import ConvModule
														
 
															 class YolofHead(nn.Module):
														
 
															-    def __init__(self, cfg, in_dim, out_dim,):
														
 
															+    def __init__(self, cfg, in_dim: int, out_dim: int,):
														
 
															         super().__init__()
														
 
															-        self.fmp_size = None
														
 
															-        self.ctr_clamp = cfg.center_clamp
														
 
															+        self.ctr_clamp = 32
														
 
															         self.DEFAULT_EXP_CLAMP = math.log(1e8)
														
 
															         self.DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
														
 
															         # ------------------ Basic parameters -------------------
														
 
															         self.cfg = cfg
														
 
															         self.in_dim = in_dim
														
 
															-        self.stride       = cfg.out_stride
														
 
															+        self.out_stride   = cfg.out_stride
														
 
															         self.num_classes  = cfg.num_classes
														
 
															         self.num_cls_head = cfg.num_cls_head
														
 
															         self.num_reg_head = cfg.num_reg_head
														
 
															-        self.act_type     = cfg.head_act
														
 
															-        self.norm_type    = cfg.head_norm
														
 
															         # Anchor config
														
 
															         self.anchor_size = torch.as_tensor(cfg.anchor_size)
														
 
															         self.num_anchors = len(cfg.anchor_size)
														
 
															         # ------------------ Network parameters -------------------
														
 
															-        ## cls head
														
 
															+        ## classification head
														
 
															         cls_heads = []
														
 
															         self.cls_head_dim = out_dim
														
 
															         for i in range(self.num_cls_head):
														
 
															             if i == 0:
														
 
															-                cls_heads.append(
														
 
															-                    BasicConv(in_dim, self.cls_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                cls_heads.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															             else:
														
 
															-                cls_heads.append(
														
 
															-                    BasicConv(self.cls_head_dim, self.cls_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															-        ## reg head
														
 
															+                cls_heads.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															+
														
 
															+        ## bbox regression head
														
 
															         reg_heads = []
														
 
															         self.reg_head_dim = out_dim
														
 
															         for i in range(self.num_reg_head):
														
 
															             if i == 0:
														
 
															-                reg_heads.append(
														
 
															-                    BasicConv(in_dim, self.reg_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                reg_heads.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															             else:
														
 
															-                reg_heads.append(
														
 
															-                    BasicConv(self.reg_head_dim, self.reg_head_dim,
														
 
															-                              kernel_size=3, padding=1, stride=1, 
														
 
															-                              act_type=self.act_type, norm_type=self.norm_type)
														
 
															-                              )
														
 
															+                reg_heads.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
														
 
															+
														
 
															         self.cls_heads = nn.Sequential(*cls_heads)
														
 
															         self.reg_heads = nn.Sequential(*reg_heads)
														
@@ -86,30 +72,25 @@ class YolofHead(nn.Module):
 
															         """fmp_size: list -> [H, W] \n
														
 
															            stride: int -> output stride
														
 
															         """
														
 
															-        # check anchor boxes
														
 
															-        if self.fmp_size is not None and self.fmp_size == fmp_size:
														
 
															-            return self.anchor_boxes
														
 
															-        else:
														
 
															-            # generate grid cells
														
 
															-            fmp_h, fmp_w = fmp_size
														
 
															-            anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															-            # [H, W, 2] -> [HW, 2]
														
 
															-            anchor_xy = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2) + 0.5
														
 
															-            # [HW, 2] -> [HW, 1, 2] -> [HW, KA, 2] 
														
 
															-            anchor_xy = anchor_xy[:, None, :].repeat(1, self.num_anchors, 1)
														
 
															-            anchor_xy *= self.stride
														
 
															-
														
 
															-            # [KA, 2] -> [1, KA, 2] -> [HW, KA, 2]
														
 
															-            anchor_wh = self.anchor_size[None, :, :].repeat(fmp_h*fmp_w, 1, 1)
														
 
															-
														
 
															-            # [HW, KA, 4] -> [M, 4]
														
 
															-            anchor_boxes = torch.cat([anchor_xy, anchor_wh], dim=-1)
														
 
															-            anchor_boxes = anchor_boxes.view(-1, 4)
														
 
															-
														
 
															-            self.anchor_boxes = anchor_boxes
														
 
															-            self.fmp_size = fmp_size
														
 
															-
														
 
															-            return anchor_boxes
														
 
															+        # generate grid cells
														
 
															+        fmp_h, fmp_w = fmp_size
														
 
															+        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															+
														
 
															+        # anchor points: [H, W, 2] -> [HW, 2]
														
 
															+        anchor_xy = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2) + 0.5
														
 
															+
														
 
															+        # [HW, 2] -> [HW, 1, 2] -> [HW, KA, 2] 
														
 
															+        anchor_xy = anchor_xy[:, None, :].repeat(1, self.num_anchors, 1)
														
 
															+        anchor_xy *= self.out_stride       # [KA, 2] -> [1, KA, 2] -> [HW, KA, 2]
														
 
															+
														
 
															+        # anchor boxes: [KA, 2] -> [HW, KA, 2]
														
 
															+        anchor_wh = self.anchor_size[None, :, :].repeat(fmp_h*fmp_w, 1, 1)
														
 
															+
														
 
															+        # [HW, KA, 4] -> [M, 4], M = H*W*KA
														
 
															+        anchor_boxes = torch.cat([anchor_xy, anchor_wh], dim=-1)
														
 
															+        anchor_boxes = anchor_boxes.view(-1, 4)
														
 
															+
														
 
															+        return anchor_boxes
														
 
															     def decode_boxes(self, anchor_boxes, pred_reg):
														
 
															         """
														
@@ -135,7 +116,7 @@ class YolofHead(nn.Module):
 
															         return pred_box
														
 
															-    def forward(self, x, mask=None):
														
 
															+    def forward(self, x):
														
 
															         # ------------------- Decoupled head -------------------
														
 
															         cls_feats = self.cls_heads(x)
														
 
															         reg_feats = self.reg_heads(x)
														
@@ -167,19 +148,11 @@ class YolofHead(nn.Module):
 
															         reg_pred = reg_pred.view(B, -1, 4)
														
 
															         ## Decode bbox
														
 
															         box_pred = self.decode_boxes(anchor_boxes[None], reg_pred)  # [B, M, 4]
														
 
															-        ## adjust mask
														
 
															-        if mask is not None:
														
 
															-            # [B, H, W]
														
 
															-            mask = torch.nn.functional.interpolate(mask[None].float(), size=fmp_size).bool()[0]
														
 
															-            # [B, H, W] -> [B, HW]
														
 
															-            mask = mask.flatten(1)
														
 
															-            # [B, HW] -> [B, HW, KA] -> [BM,], M= HW x KA
														
 
															-            mask = mask[..., None].repeat(1, 1, self.num_anchors).flatten()
														
 
															         outputs = {"pred_cls": normalized_cls_pred,
														
 
															                    "pred_reg": reg_pred,
														
 
															                    "pred_box": box_pred,
														
 
															                    "anchors": anchor_boxes,
														
 
															-                   "mask": mask}
														
 
															+                   }
														
 
															         return outputs 
														
--- a/yolo/models/yolof/yolof_encoder.py
+++ b/yolo/models/yolof/yolof_encoder.py
@@ -1,23 +1,26 @@
 
															+import torch
														
 
															 import torch.nn as nn
														
 
															-from utils import weight_init
														
 
															-from .modules import BasicConv
														
 
															+try:
														
 
															+    from .modules import ConvModule
														
 
															+except:
														
 
															+    from  modules import ConvModule
														
 
															 # BottleNeck
														
 
															 class Bottleneck(nn.Module):
														
 
															-    def __init__(self, in_dim, dilation, expand_ratio, act_type='relu', norm_type='BN'):
														
 
															+    def __init__(self, in_dim: int, dilation: int = 1, expansion: float = 0.5):
														
 
															         super(Bottleneck, self).__init__()
														
 
															         # ------------------ Basic parameters -------------------
														
 
															         self.in_dim = in_dim
														
 
															         self.dilation = dilation
														
 
															-        self.expand_ratio = expand_ratio
														
 
															-        inter_dim = round(in_dim * expand_ratio)
														
 
															+        self.expansion = expansion
														
 
															+        inter_dim = round(in_dim * expansion)
														
 
															         # ------------------ Network parameters -------------------
														
 
															         self.branch = nn.Sequential(
														
 
															-            BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type),
														
 
															-            BasicConv(inter_dim, inter_dim, kernel_size=3, padding=dilation, dilation=dilation, act_type=act_type, norm_type=norm_type),
														
 
															-            BasicConv(inter_dim, in_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															+            ConvModule(in_dim, inter_dim, kernel_size=1),
														
 
															+            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=dilation, dilation=dilation),
														
 
															+            ConvModule(inter_dim, in_dim, kernel_size=1)
														
 
															         )
														
 
															     def forward(self, x):
														
@@ -32,41 +35,56 @@ class DilatedEncoder(nn.Module):
 
															         self.out_dim = out_dim
														
 
															         self.expand_ratio = cfg.neck_expand_ratio
														
 
															         self.dilations    = cfg.neck_dilations
														
 
															-        self.act_type     = cfg.neck_act
														
 
															-        self.norm_type    = cfg.neck_norm
														
 
															         # ------------------ Network parameters -------------------
														
 
															         ## proj layer
														
 
															         self.projector = nn.Sequential(
														
 
															-            BasicConv(in_dim, out_dim, kernel_size=1, act_type=None, norm_type=self.norm_type),
														
 
															-            BasicConv(out_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=self.norm_type)
														
 
															+            ConvModule(in_dim,  out_dim, kernel_size=1, use_act=False),
														
 
															+            ConvModule(out_dim, out_dim, kernel_size=3, padding=1, use_act=False)
														
 
															         )
														
 
															         ## encoder layers
														
 
															         self.encoders = nn.Sequential(
														
 
															-            *[Bottleneck(out_dim, d, self.expand_ratio, self.act_type, self.norm_type) for d in self.dilations])
														
 
															-
														
 
															-        self._init_weight()
														
 
															-
														
 
															-    def _init_weight(self):
														
 
															-        for m in self.projector:
														
 
															-            if isinstance(m, nn.Conv2d):
														
 
															-                weight_init.c2_xavier_fill(m)
														
 
															-                weight_init.c2_xavier_fill(m)
														
 
															-            if isinstance(m, (nn.GroupNorm, nn.BatchNorm2d, nn.SyncBatchNorm)):
														
 
															-                nn.init.constant_(m.weight, 1)
														
 
															-                nn.init.constant_(m.bias, 0)
														
 
															-
														
 
															-        for m in self.encoders.modules():
														
 
															-            if isinstance(m, nn.Conv2d):
														
 
															-                nn.init.normal_(m.weight, mean=0, std=0.01)
														
 
															-                if hasattr(m, 'bias') and m.bias is not None:
														
 
															-                    nn.init.constant_(m.bias, 0)
														
 
															-
														
 
															-            if isinstance(m, (nn.GroupNorm, nn.BatchNorm2d, nn.SyncBatchNorm)):
														
 
															-                nn.init.constant_(m.weight, 1)
														
 
															-                nn.init.constant_(m.bias, 0)
														
 
															+            *[Bottleneck(in_dim = out_dim,
														
 
															+                         dilation = d,
														
 
															+                         expansion = self.expand_ratio,
														
 
															+                         ) for d in self.dilations])
														
 
															     def forward(self, x):
														
 
															         x = self.projector(x)
														
 
															         x = self.encoders(x)
														
 
															         return x
														
 
															+
														
 
															+
														
 
															+if __name__=='__main__':
														
 
															+    from thop import profile
														
 
															+
														
 
															+    # YOLOv1 configuration
														
 
															+    class YolofBaseConfig(object):
														
 
															+        def __init__(self) -> None:
														
 
															+            # ---------------- Model config ----------------
														
 
															+            self.out_stride = 32
														
 
															+            ## Backbone
														
 
															+            self.backbone = 'resnet18'
														
 
															+            self.use_pretrained = True
														
 
															+
														
 
															+            self.neck_expand_ratio = 0.25
														
 
															+            self.neck_dilations = [2, 4, 6, 8]
														
 
															+
														
 
															+    cfg = YolofBaseConfig()
														
 
															+
														
 
															+    # Randomly generate a input data
														
 
															+    x = torch.randn(2, 512, 20, 20)
														
 
															+
														
 
															+    # Build backbone
														
 
															+    model = DilatedEncoder(cfg, in_dim=512, out_dim=512)
														
 
															+
														
 
															+    # Inference
														
 
															+    output = model(x)
														
 
															+    print(' - the shape of input :  ', x.shape)
														
 
															+    print(' - the shape of output : ', output.shape)
														
 
															+
														
 
															+    x = torch.randn(1, 512, 20, 20)
														
 
															+    flops, params = profile(model, inputs=(x, ), verbose=False)
														
 
															+    print('============== FLOPs & Params ================')
														
 
															+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
														
 
															+    print(' - Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/utils/misc.py
+++ b/yolo/utils/misc.py
@@ -204,7 +204,7 @@ class CollateFunc(object):
 
															 # ---------------------------- For Loss ----------------------------
														
 
															 ## FocalLoss
														
 
															-def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
														
 
															+def sigmoid_focal_loss(inputs, targets, alpha: float = 0.25, gamma: float = 2):
														
 
															     """
														
 
															     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
														
 
															     Args:
														
@@ -229,7 +229,7 @@ def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: f
 
															         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
														
 
															         loss = alpha_t * loss
														
 
															-    return loss.mean(1).sum() / num_boxes
														
 
															+    return loss
														
 
															 ## Variable FocalLoss
														
 
															 def varifocal_loss_with_logits(pred_logits,