1 year ago · 373bd025d1
--- a/odlab/config/__init__.py
+++ b/odlab/config/__init__.py
@@ -1,15 +1,11 @@
 
															 # ----------------------- Model Config -----------------------
														
 
															-from .retinanet_config import retinanet_cfg
														
 
															 from .fcos_config      import fcos_cfg
														
 
															 from .yolof_config     import yolof_cfg
														
 
															 from .detr_config      import detr_cfg
														
 
															 def build_config(args):
														
 
															-    # RetinaNet
														
 
															-    if args.model in retinanet_cfg.keys():
														
 
															-        return retinanet_cfg[args.model]
														
 
															     # FCOS
														
 
															-    elif args.model in fcos_cfg.keys():
														
 
															+    if   args.model in fcos_cfg.keys():
														
 
															         return fcos_cfg[args.model]
														
 
															     # YOLOF
														
 
															     elif args.model in yolof_cfg.keys():
														
--- a/odlab/config/detr_config.py
+++ b/odlab/config/detr_config.py
@@ -1,4 +1,14 @@
 
															-# Plain DETR
														
 
															+# DETR
														
 
															+
														
 
															+
														
 
															+class DetrBaseConfig(object):
														
 
															+    def __init__(self):
														
 
															+        pass
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															 detr_cfg = {
														
 
															     'detr_r50':{
														
--- a/odlab/config/fcos_config.py
+++ b/odlab/config/fcos_config.py
@@ -1,6 +1,15 @@
 
															 # Fully Convolutional One-Stage object detector
														
 
															+class FcosBaseConfig(object):
														
 
															+    def __init__(self):
														
 
															+        pass
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															+
														
 
															 fcos_cfg = {
														
 
															     'fcos_r18_1x':{
														
 
															         # ----------------- Model-----------------
														
@@ -164,4 +173,164 @@ fcos_cfg = {
 
															         'normalize_coords': False,
														
 
															     },
														
 
															+    'fcos_rt_r18_1x':{
														
 
															+        # ----------------- Model-----------------
														
 
															+        ## Backbone
														
 
															+        'backbone': 'resnet18',
														
 
															+        'backbone_norm': 'FrozeBN',
														
 
															+        'res5_dilation': False,
														
 
															+        'pretrained': True,
														
 
															+        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone
														
 
															+        'pretrained_weight': 'imagenet1k_v1',
														
 
															+        'max_stride': 32,
														
 
															+        'out_stride': [8, 16, 32],
														
 
															+        ## Neck
														
 
															+        'neck': 'basic_fpn',
														
 
															+        'fpn_p6_feat': False,
														
 
															+        'fpn_p7_feat': False,
														
 
															+        'fpn_p6_from_c5': False,
														
 
															+        ## Head
														
 
															+        'head': 'fcos_head',
														
 
															+        'head_dim': 256,
														
 
															+        'num_cls_head': 4,
														
 
															+        'num_reg_head': 4,
														
 
															+        'head_act': 'relu',
														
 
															+        'head_norm': 'GN',
														
 
															+        ## Post-process
														
 
															+        'train_topk': 1000,
														
 
															+        'train_conf_thresh': 0.05,
														
 
															+        'train_nms_thresh': 0.6,
														
 
															+        'test_topk': 100,
														
 
															+        'test_conf_thresh': 0.5,
														
 
															+        'test_nms_thresh': 0.45,
														
 
															+        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
														
 
															+        # ----------------- Label Assignment -----------------
														
 
															+        'matcher': 'fcos_matcher',
														
 
															+        'matcher_hpy':{'center_sampling_radius': 1.5,
														
 
															+                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
														
 
															+                       },
														
 
															+        # ----------------- Loss weight -----------------
														
 
															+        ## Loss hyper-parameters
														
 
															+        'focal_loss_alpha': 0.25,
														
 
															+        'focal_loss_gamma': 2.0,
														
 
															+        'loss_cls_weight': 1.0,
														
 
															+        'loss_reg_weight': 1.0,
														
 
															+        'loss_ctn_weight': 1.0,
														
 
															+        # ----------------- Training -----------------
														
 
															+        ## Training scheduler
														
 
															+        'scheduler': '1x',
														
 
															+        ## Optimizer
														
 
															+        'optimizer': 'sgd',
														
 
															+        'base_lr': 0.01 / 16,
														
 
															+        'backbone_lr_ratio': 1.0 / 1.0,
														
 
															+        'momentum': 0.9,
														
 
															+        'weight_decay': 1e-4,
														
 
															+        'clip_max_norm': -1.0,
														
 
															+        ## LR Scheduler
														
 
															+        'lr_scheduler': 'step',
														
 
															+        'warmup': 'linear',
														
 
															+        'warmup_iters': 500,
														
 
															+        'warmup_factor': 0.00066667,
														
 
															+        ## Epoch
														
 
															+        'max_epoch': 36,       # 1x
														
 
															+        'lr_epoch': [24, 33],  # 1x
														
 
															+        # ----------------- Input -----------------
														
 
															+        ## Transforms
														
 
															+        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
														
 
															+        'train_max_size': 900,
														
 
															+        'test_min_size': [512],
														
 
															+        'test_max_size': 736,
														
 
															+        ## Pixel mean & std
														
 
															+        'pixel_mean': [0.485, 0.456, 0.406],
														
 
															+        'pixel_std':  [0.229, 0.224, 0.225],
														
 
															+        ## Transforms
														
 
															+        'detr_style': False,
														
 
															+        'trans_config': [
														
 
															+            {'name': 'RandomHFlip'},
														
 
															+            {'name': 'RandomResize'},
														
 
															+        ],
														
 
															+        'box_format': 'xyxy',
														
 
															+        'normalize_coords': False,
														
 
															+    },
														
 
															+
														
 
															+    'fcos_rt_r50_1x':{
														
 
															+        # ----------------- Model-----------------
														
 
															+        ## Backbone
														
 
															+        'backbone': 'resnet50',
														
 
															+        'backbone_norm': 'FrozeBN',
														
 
															+        'res5_dilation': False,
														
 
															+        'pretrained': True,
														
 
															+        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone
														
 
															+        'pretrained_weight': 'imagenet1k_v1',
														
 
															+        'max_stride': 32,
														
 
															+        'out_stride': [8, 16, 32],
														
 
															+        ## Neck
														
 
															+        'neck': 'basic_fpn',
														
 
															+        'fpn_p6_feat': False,
														
 
															+        'fpn_p7_feat': False,
														
 
															+        'fpn_p6_from_c5': False,
														
 
															+        ## Head
														
 
															+        'head': 'fcos_head',
														
 
															+        'head_dim': 256,
														
 
															+        'num_cls_head': 4,
														
 
															+        'num_reg_head': 4,
														
 
															+        'head_act': 'relu',
														
 
															+        'head_norm': 'GN',
														
 
															+        ## Post-process
														
 
															+        'train_topk': 1000,
														
 
															+        'train_conf_thresh': 0.05,
														
 
															+        'train_nms_thresh': 0.6,
														
 
															+        'test_topk': 100,
														
 
															+        'test_conf_thresh': 0.5,
														
 
															+        'test_nms_thresh': 0.45,
														
 
															+        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
														
 
															+        # ----------------- Label Assignment -----------------
														
 
															+        'matcher': 'fcos_matcher',
														
 
															+        'matcher_hpy':{'center_sampling_radius': 1.5,
														
 
															+                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
														
 
															+                       },
														
 
															+        # ----------------- Loss weight -----------------
														
 
															+        ## Loss hyper-parameters
														
 
															+        'focal_loss_alpha': 0.25,
														
 
															+        'focal_loss_gamma': 2.0,
														
 
															+        'loss_cls_weight': 1.0,
														
 
															+        'loss_reg_weight': 1.0,
														
 
															+        'loss_ctn_weight': 1.0,
														
 
															+        # ----------------- Training -----------------
														
 
															+        ## Training scheduler
														
 
															+        'scheduler': '1x',
														
 
															+        ## Optimizer
														
 
															+        'optimizer': 'sgd',
														
 
															+        'base_lr': 0.01 / 16,
														
 
															+        'backbone_lr_ratio': 1.0 / 1.0,
														
 
															+        'momentum': 0.9,
														
 
															+        'weight_decay': 1e-4,
														
 
															+        'clip_max_norm': -1.0,
														
 
															+        ## LR Scheduler
														
 
															+        'lr_scheduler': 'step',
														
 
															+        'warmup': 'linear',
														
 
															+        'warmup_iters': 500,
														
 
															+        'warmup_factor': 0.00066667,
														
 
															+        ## Epoch
														
 
															+        'max_epoch': 36,       # 1x
														
 
															+        'lr_epoch': [24, 33],  # 1x
														
 
															+        # ----------------- Input -----------------
														
 
															+        ## Transforms
														
 
															+        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
														
 
															+        'train_max_size': 900,
														
 
															+        'test_min_size': [512],
														
 
															+        'test_max_size': 736,
														
 
															+        ## Pixel mean & std
														
 
															+        'pixel_mean': [0.485, 0.456, 0.406],
														
 
															+        'pixel_std':  [0.229, 0.224, 0.225],
														
 
															+        ## Transforms
														
 
															+        'detr_style': False,
														
 
															+        'trans_config': [
														
 
															+            {'name': 'RandomHFlip'},
														
 
															+            {'name': 'RandomResize'},
														
 
															+        ],
														
 
															+        'box_format': 'xyxy',
														
 
															+        'normalize_coords': False,
														
 
															+    },
														
 
															+
														
 
															 }
														
--- a/odlab/config/retinanet_config.py
+++ b/odlab/config/retinanet_config.py
@@ -1,175 +0,0 @@
 
															-# RetinaNet
														
 
															-
														
 
															-
														
 
															-retinanet_cfg = {
														
 
															-    'retinanet_r18_1x':{
														
 
															-        # ----------------- Model-----------------
														
 
															-        ## Backbone
														
 
															-        'backbone': 'resnet18',
														
 
															-        'backbone_norm': 'FrozeBN',
														
 
															-        'res5_dilation': False,
														
 
															-        'pretrained': True,
														
 
															-        'pretrained_weight': 'imagenet1k_v1',
														
 
															-        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone        
														
 
															-        'max_stride': 128,
														
 
															-        'out_stride': [8, 16, 32, 64, 128],
														
 
															-        ## Neck
														
 
															-        'neck': 'basic_fpn',
														
 
															-        'fpn_p6_feat': True,
														
 
															-        'fpn_p7_feat': True,
														
 
															-        'fpn_p6_from_c5': True,
														
 
															-        ## Head
														
 
															-        'head': 'retinanet_head',
														
 
															-        'head_dim': 256,
														
 
															-        'num_cls_head': 4,
														
 
															-        'num_reg_head': 4,
														
 
															-        'head_act': 'relu',
														
 
															-        'head_norm': None,
														
 
															-        'anchor_config': {'basic_size': [[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]],
														
 
															-                          'aspect_ratio': [0.5, 1.0, 2.0],
														
 
															-                          'area_scale': [2 ** 0, 2 ** (1. / 3.), 2 ** (2. / 3.)]},
														
 
															-        ## Post-process
														
 
															-        'train_topk': 1000,
														
 
															-        'train_conf_thresh': 0.05,
														
 
															-        'train_nms_thresh': 0.6,
														
 
															-        'test_topk': 100,
														
 
															-        'test_conf_thresh': 0.3,
														
 
															-        'test_nms_thresh': 0.45,
														
 
															-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
														
 
															-        # ----------------- Label Assignment -----------------
														
 
															-        'matcher': 'retinanet_matcher',
														
 
															-        'matcher_hpy': {'iou_thresh': [0.4, 0.5],
														
 
															-                        'iou_labels': [0, -1, 1], # [negative sample, ignored sample, positive sample]
														
 
															-                        'allow_low_quality_matches': True,
														
 
															-                        },
														
 
															-        # ----------------- Loss weight -----------------
														
 
															-        ## Loss hyper-parameters
														
 
															-        'focal_loss_alpha': 0.25,
														
 
															-        'focal_loss_gamma': 2.0,
														
 
															-        'loss_cls_weight': 1.0,
														
 
															-        'loss_reg_weight': 1.0,
														
 
															-        'use_giou_loss': False,
														
 
															-        # ----------------- Training -----------------
														
 
															-        ## Training scheduler
														
 
															-        'scheduler': '1x',
														
 
															-        ## Optimizer
														
 
															-        'optimizer': 'sgd',
														
 
															-        'base_lr': 0.01 / 16,
														
 
															-        'backbone_lr_ratio': 1.0 / 1.0,
														
 
															-        'momentum': 0.9,
														
 
															-        'weight_decay': 1e-4,
														
 
															-        'clip_max_norm': -1.0,
														
 
															-        'param_dict_type': 'default',
														
 
															-        ## LR Scheduler
														
 
															-        'lr_scheduler': 'step',
														
 
															-        'warmup': 'linear',
														
 
															-        'warmup_iters': 500,
														
 
															-        'warmup_factor': 0.00066667,
														
 
															-        ## Epoch
														
 
															-        'max_epoch': 12,      # 1x
														
 
															-        'lr_epoch': [8, 11],  # 1x
														
 
															-        # ----------------- Input -----------------
														
 
															-        ## Transforms
														
 
															-        'train_min_size': [800],   # short edge of image
														
 
															-        'train_max_size': 1333,
														
 
															-        'test_min_size': [800],
														
 
															-        'test_max_size': 1333,
														
 
															-        ## Pixel mean & std
														
 
															-        'pixel_mean': [0.485, 0.456, 0.406],
														
 
															-        'pixel_std':  [0.229, 0.224, 0.225],
														
 
															-        ## Transforms
														
 
															-        'detr_style': False,
														
 
															-        'trans_config': [
														
 
															-            {'name': 'RandomHFlip'},
														
 
															-            {'name': 'RandomResize'},
														
 
															-        ],
														
 
															-        'box_format': 'xyxy',
														
 
															-        'normalize_coords': False,
														
 
															-    },
														
 
															-
														
 
															-    'retinanet_r50_1x':{
														
 
															-        # ----------------- Model-----------------
														
 
															-        ## Backbone
														
 
															-        'backbone': 'resnet50',
														
 
															-        'backbone_norm': 'FrozeBN',
														
 
															-        'res5_dilation': False,
														
 
															-        'pretrained': True,
														
 
															-        'pretrained_weight': 'imagenet1k_v1',
														
 
															-        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone        
														
 
															-        'max_stride': 128,
														
 
															-        'out_stride': [8, 16, 32, 64, 128],
														
 
															-        ## Neck
														
 
															-        'neck': 'basic_fpn',
														
 
															-        'fpn_p6_feat': True,
														
 
															-        'fpn_p7_feat': True,
														
 
															-        'fpn_p6_from_c5': True,
														
 
															-        ## Head
														
 
															-        'head': 'retinanet_head',
														
 
															-        'head_dim': 256,
														
 
															-        'num_cls_head': 4,
														
 
															-        'num_reg_head': 4,
														
 
															-        'head_act': 'relu',
														
 
															-        'head_norm': None,
														
 
															-        'anchor_config': {'basic_size': [[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]],
														
 
															-                          'aspect_ratio': [0.5, 1.0, 2.0],
														
 
															-                          'area_scale': [2 ** 0, 2 ** (1. / 3.), 2 ** (2. / 3.)]},
														
 
															-        ## Post-process
														
 
															-        'train_topk': 1000,
														
 
															-        'train_conf_thresh': 0.05,
														
 
															-        'train_nms_thresh': 0.6,
														
 
															-        'test_topk': 100,
														
 
															-        'test_conf_thresh': 0.3,
														
 
															-        'test_nms_thresh': 0.45,
														
 
															-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
														
 
															-        # ----------------- Label Assignment -----------------
														
 
															-        'matcher': 'retinanet_matcher',
														
 
															-        'matcher_hpy': {'iou_thresh': [0.4, 0.5],
														
 
															-                        'iou_labels': [0, -1, 1], # [negative sample, ignored sample, positive sample]
														
 
															-                        'allow_low_quality_matches': True,
														
 
															-                        },
														
 
															-        # ----------------- Loss weight -----------------
														
 
															-        ## Loss hyper-parameters
														
 
															-        'focal_loss_alpha': 0.25,
														
 
															-        'focal_loss_gamma': 2.0,
														
 
															-        'loss_cls_weight': 1.0,
														
 
															-        'loss_reg_weight': 1.0,
														
 
															-        'use_giou_loss': False,
														
 
															-        # ----------------- Training -----------------
														
 
															-        ## Training scheduler
														
 
															-        'scheduler': '1x',
														
 
															-        ## Optimizer
														
 
															-        'optimizer': 'sgd',
														
 
															-        'base_lr': 0.01 / 16,
														
 
															-        'backbone_lr_ratio': 1.0 / 1.0,
														
 
															-        'momentum': 0.9,
														
 
															-        'weight_decay': 1e-4,
														
 
															-        'clip_max_norm': -1.0,
														
 
															-        'param_dict_type': 'default',
														
 
															-        ## LR Scheduler
														
 
															-        'lr_scheduler': 'step',
														
 
															-        'warmup': 'linear',
														
 
															-        'warmup_iters': 500,
														
 
															-        'warmup_factor': 0.00066667,
														
 
															-        ## Epoch
														
 
															-        'max_epoch': 12,      # 1x
														
 
															-        'lr_epoch': [8, 11],  # 1x
														
 
															-        # ----------------- Input -----------------
														
 
															-        ## Transforms
														
 
															-        'train_min_size': [800],   # short edge of image
														
 
															-        'train_max_size': 1333,
														
 
															-        'test_min_size': [800],
														
 
															-        'test_max_size': 1333,
														
 
															-        ## Pixel mean & std
														
 
															-        'pixel_mean': [0.485, 0.456, 0.406],
														
 
															-        'pixel_std':  [0.229, 0.224, 0.225],
														
 
															-        ## Transforms
														
 
															-        'detr_style': False,
														
 
															-        'trans_config': [
														
 
															-            {'name': 'RandomHFlip'},
														
 
															-            {'name': 'RandomResize'},
														
 
															-        ],
														
 
															-        'box_format': 'xyxy',
														
 
															-        'normalize_coords': False,
														
 
															-    },
														
 
															-
														
 
															-}
														
--- a/odlab/config/yolof_config.py
+++ b/odlab/config/yolof_config.py
@@ -1,6 +1,15 @@
 
															 # Fully Convolutional One-Stage object detector
														
 
															+class YolofBaseConfig(object):
														
 
															+    def __init__(self):
														
 
															+        pass
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															+
														
 
															 yolof_cfg = {
														
 
															     # --------------- C5 level ---------------
														
 
															     'yolof_r18_c5_1x':{
														
--- a/odlab/datasets/__init__.py
+++ b/odlab/datasets/__init__.py
@@ -6,15 +6,15 @@ from .coco import build_coco, coco_labels, coco_indexs
 
															 from .transforms import build_transform
														
 
															-def build_dataset(args, transform=None, is_train=False):
														
 
															+def build_dataset(args, cfg, transform=None, is_train=False):
														
 
															     if args.dataset == 'coco':
														
 
															         dataset = build_coco(args, transform, is_train)
														
 
															-        dataset_info = {
														
 
															-            'class_labels': dataset.coco_labels,
														
 
															-            'num_classes': 80
														
 
															-        }
														
 
															+        class_labels = coco_labels
														
 
															+        num_classes  = 80
														
 
															+    cfg.class_labels = class_labels
														
 
															+    cfg.num_classes  = num_classes
														
 
															-    return dataset, dataset_info
														
 
															+    return dataset
														
 
															 def build_dataloader(args, dataset, batch_size, collate_fn, is_train=False):
														
 
															     if args.distributed:
														
--- a/odlab/engine.py
+++ b/odlab/engine.py
@@ -21,27 +21,24 @@ def train_one_epoch(cfg,
 
															                     epoch       : int,
														
 
															                     vis_target  : bool,
														
 
															                     warmup_lr_scheduler,
														
 
															-                    class_labels = None,
														
 
															-                    model_ema    = None,
														
 
															                     debug       :bool = False
														
 
															                     ):
														
 
															     model.train()
														
 
															     criterion.train()
														
 
															     metric_logger = MetricLogger(delimiter="  ")
														
 
															     metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
														
 
															-    header = 'Epoch: [{} / {}]'.format(epoch, cfg['max_epoch'])
														
 
															+    header = 'Epoch: [{} / {}]'.format(epoch, cfg.max_epoch)
														
 
															     epoch_size = len(data_loader)
														
 
															     print_freq = 10
														
 
															-    iteration = 0
														
 
															-    for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
														
 
															-        ni = iteration + epoch * epoch_size
														
 
															+    for iter_i, (samples, targets) in metric_logger.log_every(data_loader, print_freq, header):
														
 
															+        ni = iter_i + epoch * epoch_size
														
 
															         # WarmUp
														
 
															-        if ni < cfg['warmup_iters']:
														
 
															+        if ni < cfg.warmup_iters:
														
 
															             warmup_lr_scheduler(ni, optimizer)
														
 
															-        elif ni == cfg['warmup_iters']:
														
 
															+        elif ni == cfg.warmup_iters:
														
 
															             print('Warmup stage is over.')
														
 
															-            warmup_lr_scheduler.set_lr(optimizer, cfg['base_lr'])
														
 
															+            warmup_lr_scheduler.set_lr(optimizer, cfg.base_lr)
														
 
															         # To device
														
 
															         images, masks = samples
														
@@ -51,7 +48,7 @@ def train_one_epoch(cfg,
 
															         # Visualize train targets
														
 
															         if vis_target:
														
 
															-            vis_data(images, targets, masks, class_labels, cfg['normalize_coords'], cfg['box_format'])
														
 
															+            vis_data(images, targets, masks, cfg.class_labels, cfg.normalize_coords, cfg.box_format)
														
 
															         # Inference
														
 
															         outputs = model(images, masks, targets)
														
@@ -60,13 +57,11 @@ def train_one_epoch(cfg,
 
															         loss_dict = criterion(outputs, targets)
														
 
															         loss_weight_dict = criterion.weight_dict
														
 
															         losses = sum(loss_dict[k] * loss_weight_dict[k] for k in loss_dict.keys() if k in loss_weight_dict)
														
 
															+        loss_value = losses.item()
														
 
															+        losses /= cfg.grad_accumulate
														
 
															         # Reduce losses over all GPUs for logging purposes
														
 
															         loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
														
 
															-        loss_dict_reduced_scaled = {k: v * loss_weight_dict[k] for k, v in loss_dict_reduced.items() if k in loss_weight_dict}
														
 
															-        losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())
														
 
															-
														
 
															-        loss_value = losses_reduced_scaled.item()
														
 
															         # Check loss
														
 
															         if not math.isfinite(loss_value):
														
@@ -75,18 +70,16 @@ def train_one_epoch(cfg,
 
															             sys.exit(1)
														
 
															         # Backward
														
 
															-        optimizer.zero_grad()
														
 
															         losses.backward()
														
 
															-        if cfg['clip_max_norm'] > 0:
														
 
															-            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg['clip_max_norm'])
														
 
															-        optimizer.step()
														
 
															-        iteration += 1
														
 
															+        if cfg.clip_max_norm > 0:
														
 
															+            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.clip_max_norm)
														
 
															-        # ema
														
 
															-        if model_ema is not None:
														
 
															-            model_ema.update(model)
														
 
															+        # Optimize
														
 
															+        if (iter_i + 1) % cfg.grad_accumulate == 0:
														
 
															+            optimizer.step()
														
 
															+            optimizer.zero_grad()
														
 
															-        metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled)
														
 
															+        metric_logger.update(loss=loss_value, **loss_dict_reduced)
														
 
															         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
														
 
															         if debug:
														
--- a/odlab/evaluator/__init__.py
+++ b/odlab/evaluator/__init__.py
@@ -1,10 +1,10 @@
 
															 from evaluator.coco_evaluator import COCOAPIEvaluator
														
 
															-def build_evluator(args, cfg, device, testset=False):
														
 
															+def build_evluator(args, cfg, device):
														
 
															     evaluator = None
														
 
															     # COCO Evaluator
														
 
															     if args.dataset == 'coco':
														
 
															-        evaluator = COCOAPIEvaluator(args, cfg, device, testset)
														
 
															+        evaluator = COCOAPIEvaluator(args, cfg, device)
														
 
															     return evaluator
														
--- a/odlab/evaluator/coco_evaluator.py
+++ b/odlab/evaluator/coco_evaluator.py
@@ -4,23 +4,21 @@ import contextlib
 
															 import torch
														
 
															 from pycocotools.cocoeval import COCOeval
														
 
															-from datasets import build_dataset, build_transform
														
 
															-
														
 
															+from datasets import build_transform
														
 
															+from datasets.coco import build_coco
														
 
															 class COCOAPIEvaluator():
														
 
															-    def __init__(self, args, cfg, device, testset=False):
														
 
															+    def __init__(self, args, cfg, device):
														
 
															         # ----------------- Basic parameters -----------------
														
 
															-        self.ddp_mode = True if args.distributed else False
														
 
															-        self.image_set = 'test2017' if testset else 'val2017'
														
 
															+        self.image_set = 'val2017'
														
 
															         self.device = device
														
 
															-        self.testset = testset
														
 
															         # ----------------- Metrics -----------------
														
 
															         self.map = 0.
														
 
															         self.ap50_95 = 0.
														
 
															         self.ap50 = 0.
														
 
															         # ----------------- Dataset -----------------
														
 
															         self.transform = build_transform(cfg, is_train=False)
														
 
															-        self.dataset, self.dataset_info = build_dataset(args, self.transform, is_train=False)
														
 
															+        self.dataset = build_coco(args, self.transform, is_train=False)
														
 
															     @torch.no_grad()
														
--- a/odlab/main.py
+++ b/odlab/main.py
@@ -11,7 +11,6 @@ from torch.nn.parallel import DistributedDataParallel as DDP
 
															 from utils import distributed_utils
														
 
															 from utils.misc import compute_flops, collate_fn
														
 
															-from utils.misc import get_param_dict, ModelEMA
														
 
															 from utils.optimizer import build_optimizer
														
 
															 from utils.lr_scheduler import build_wp_lr_scheduler, build_lr_scheduler
														
@@ -36,12 +35,8 @@ def parse_args():
 
															     # Model
														
 
															     parser.add_argument('-m', '--model', default='yolof_r18_c5_1x',
														
 
															                         help='build object detector')
														
 
															-    parser.add_argument('-p', '--pretrained', default=None, type=str,
														
 
															-                        help='load pretrained weight')
														
 
															     parser.add_argument('-r', '--resume', default=None, type=str,
														
 
															                         help='keep training')
														
 
															-    parser.add_argument('--ema', default=None, type=str,
														
 
															-                        help='use Model EMA trick.')
														
 
															     # Dataset
														
 
															     parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
														
 
															                         help='data root')
														
@@ -53,8 +48,6 @@ def parse_args():
 
															     parser.add_argument('--num_workers', default=2, type=int, 
														
 
															                         help='Number of workers used in dataloading')
														
 
															     # Epoch
														
 
															-    parser.add_argument('--eval_epoch', default=2, type=int,
														
 
															-                        help='interval between evaluations')
														
 
															     parser.add_argument('--save_folder', default='weights/', type=str, 
														
 
															                         help='path to save weight')
														
 
															     parser.add_argument('--eval_first', action="store_true", default=False,
														
@@ -68,8 +61,6 @@ def parse_args():
 
															                         help='number of distributed processes')
														
 
															     parser.add_argument('--sybn', action='store_true', default=False, 
														
 
															                         help='use sybn.')
														
 
															-    parser.add_argument('--find_unused_parameters', action='store_true', default=False, 
														
 
															-                        help='set find_unused_parameters as True.')
														
 
															     # Debug setting
														
 
															     parser.add_argument('--debug', action='store_true', default=False, 
														
 
															                         help='debug codes.')
														
@@ -93,7 +84,6 @@ def main():
 
															     path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
														
 
															     os.makedirs(path_to_save, exist_ok=True)
														
 
															-
														
 
															     # ---------------------------- Build DDP ----------------------------
														
 
															     distributed_utils.init_distributed_mode(args)
														
 
															     print("git:\n  {}\n".format(distributed_utils.get_sha()))
														
@@ -101,7 +91,6 @@ def main():
 
															     print('World size: {}'.format(world_size))
														
 
															     per_gpu_batch = args.batch_size // world_size
														
 
															-
														
 
															     # ---------------------------- Build CUDA ----------------------------
														
 
															     if args.cuda and torch.cuda.is_available():
														
 
															         print('use cuda')
														
@@ -109,28 +98,23 @@ def main():
 
															     else:
														
 
															         device = torch.device("cpu")
														
 
															-
														
 
															     # ---------------------------- Fix random seed ----------------------------
														
 
															     fix_random_seed(args)
														
 
															-
														
 
															     # ---------------------------- Build config ----------------------------
														
 
															     cfg = build_config(args)
														
 
															     print('Model config: ', cfg)
														
 
															-
														
 
															     # ---------------------------- Build Dataset ----------------------------
														
 
															     transforms = build_transform(cfg, is_train=True)
														
 
															-    dataset, dataset_info = build_dataset(args, transforms, is_train=True)
														
 
															-
														
 
															+    dataset = build_dataset(args, cfg, transforms, is_train=True)
														
 
															     # ---------------------------- Build Dataloader ----------------------------
														
 
															     train_loader = build_dataloader(args, dataset, per_gpu_batch, collate_fn, is_train=True)
														
 
															-
														
 
															     # ---------------------------- Build model ----------------------------
														
 
															     ## Build model
														
 
															-    model, criterion = build_model(args, cfg, dataset_info['num_classes'], is_val=True)
														
 
															+    model, criterion = build_model(args, cfg, cfg.num_classes, is_val=True)
														
 
															     model.to(device)
														
 
															     model_without_ddp = model
														
 
															     ## Calcute Params & GFLOPs
														
@@ -139,51 +123,35 @@ def main():
 
															         model_copy.trainable = False
														
 
															         model_copy.eval()
														
 
															         compute_flops(model=model_copy,
														
 
															-                      min_size=cfg['test_min_size'],
														
 
															-                      max_size=cfg['test_max_size'],
														
 
															+                      min_size=cfg.test_min_size,
														
 
															+                      max_size=cfg.test_max_size,
														
 
															                       device=device)
														
 
															         del model_copy
														
 
															     if args.distributed:
														
 
															         dist.barrier()
														
 
															-
														
 
															     # ---------------------------- Build Optimizer ----------------------------
														
 
															-    cfg['base_lr'] = cfg['base_lr'] * args.batch_size
														
 
															-    param_dicts = None
														
 
															-    if 'param_dict_type' in cfg.keys() and cfg['param_dict_type'] != 'default':
														
 
															-        print("- Param dict type: {}".format(cfg['param_dict_type']))
														
 
															-        param_dicts = get_param_dict(model_without_ddp, cfg)
														
 
															-    optimizer, start_epoch = build_optimizer(cfg, model_without_ddp, param_dicts, args.resume)
														
 
															-
														
 
															+    cfg.grad_accumulate = max(16 // args.batch_size, 1)
														
 
															+    cfg.base_lr = cfg.per_image_lr * args.batch_size * cfg.grad_accumulate
														
 
															+    optimizer, start_epoch = build_optimizer(cfg, model_without_ddp, args.resume)
														
 
															     # ---------------------------- Build LR Scheduler ----------------------------
														
 
															-    wp_lr_scheduler = build_wp_lr_scheduler(cfg, cfg['base_lr'])
														
 
															+    wp_lr_scheduler = build_wp_lr_scheduler(cfg, cfg.base_lr)
														
 
															     lr_scheduler    = build_lr_scheduler(cfg, optimizer, args.resume)
														
 
															-
														
 
															-    # ---------------------------- Build Model EMA ----------------------------
														
 
															-    model_ema = None
														
 
															-    if 'use_ema' in cfg.keys() and cfg['use_ema']:
														
 
															-        print("Build Model EMA for {}".format(args.model))
														
 
															-        model_ema = ModelEMA(cfg, model, start_epoch * len(train_loader))
														
 
															-
														
 
															-
														
 
															     # ---------------------------- Build DDP model ----------------------------
														
 
															     if args.distributed:
														
 
															-        model = DDP(model, device_ids=[args.gpu], find_unused_parameters=args.find_unused_parameters)
														
 
															+        model = DDP(model, device_ids=[args.gpu])
														
 
															         model_without_ddp = model.module
														
 
															-
														
 
															     # ---------------------------- Build Evaluator ----------------------------
														
 
															     evaluator = build_evluator(args, cfg, device)
														
 
															-
														
 
															     # ----------------------- Eval before training -----------------------
														
 
															     if args.eval_first and distributed_utils.is_main_process():
														
 
															         evaluator.evaluate(model_without_ddp)
														
 
															         return
														
 
															-
														
 
															     # ----------------------- Training -----------------------
														
 
															     print("Start training")
														
 
															     best_map = -1.
														
@@ -201,8 +169,6 @@ def main():
 
															                         epoch,
														
 
															                         args.vis_tgt,
														
 
															                         wp_lr_scheduler,
														
 
															-                        dataset_info['class_labels'],
														
 
															-                        model_ema=model_ema,
														
 
															                         debug=args.debug)
														
 
															         # LR Scheduler
														
@@ -210,23 +176,25 @@ def main():
 
															         # Evaluate
														
 
															         if distributed_utils.is_main_process():
														
 
															-            model_eval = model_ema.ema if model_ema is not None else model_without_ddp
														
 
															+            model_eval = model_without_ddp
														
 
															+            to_save = False
														
 
															             if (epoch % args.eval_epoch) == 0 or (epoch == cfg['max_epoch'] - 1):
														
 
															                 if evaluator is None:
														
 
															-                    cur_map = 0.
														
 
															+                    to_save = True
														
 
															                 else:
														
 
															                     evaluator.evaluate(model_eval)
														
 
															-                    cur_map = evaluator.map
														
 
															-                # Save model
														
 
															-                if cur_map > best_map:
														
 
															-                    # update best-map
														
 
															-                    best_map = cur_map
														
 
															+                    # Save model
														
 
															+                    if evaluator.map >= best_map:
														
 
															+                        best_map = evaluator.map
														
 
															+                        to_save = True
														
 
															+
														
 
															+                if to_save:
														
 
															                     # save model
														
 
															-                    print('Saving state, epoch:', epoch + 1)
														
 
															+                    print('Saving state, epoch:', epoch)
														
 
															                     torch.save({'model':        model_eval.state_dict(),
														
 
															                                 'optimizer':    optimizer.state_dict(),
														
 
															                                 'lr_scheduler': lr_scheduler.state_dict(),
														
 
															-                                'mAP':          round(cur_map*100, 1),
														
 
															+                                'mAP':          round(best_map*100, 1),
														
 
															                                 'epoch':        epoch,
														
 
															                                 'args':         args}, 
														
 
															                                 os.path.join(path_to_save, '{}_best.pth'.format(args.model)))
														
--- a/odlab/models/backbone/__init__.py
+++ b/odlab/models/backbone/__init__.py
@@ -1,15 +1,13 @@
 
															 from .resnet           import build_resnet
														
 
															-from .swin_transformer import build_swin_transformer
														
 
															 def build_backbone(cfg):
														
 
															     print('==============================')
														
 
															     print('Backbone: {}'.format(cfg['backbone']))
														
 
															     # ResNet
														
 
															-    if cfg['backbone'] in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']:
														
 
															+    if "resnet" in cfg.backbone:
														
 
															         return build_resnet(cfg)
														
 
															-    # SwinTransformer
														
 
															-    elif cfg['backbone'] in ['swin_T_224_1k', 'swin_S_224_22k', 'swin_B_224_22k', 'swin_B_384_22k', 'swin_L_224_22k', 'swin_L_384_22k']:
														
 
															-        return build_swin_transformer(cfg)
														
 
															+    else:
														
 
															+        raise NotImplementedError("unknown backbone: {}".format(cfg.backbone))
														
--- a/odlab/models/basic/conv.py
+++ b/odlab/models/basic/conv.py
@@ -1,11 +1,4 @@
 
															-import math
														
 
															-from typing import List
														
 
															-
														
 
															-import torch
														
 
															 import torch.nn as nn
														
 
															-import torch.nn.functional as F
														
 
															-
														
 
															-from .norm import LayerNorm2D
														
 
															 def get_conv2d(c1, c2, k, p, s, d, g):
														
@@ -80,218 +73,3 @@ class ConvModule(nn.Module):
 
															     def forward(self, x):
														
 
															         return self.convs(x)
														
 
															-
														
 
															-class BasicConv(nn.Module):
														
 
															-    def __init__(self, 
														
 
															-                 in_dim,                   # in channels
														
 
															-                 out_dim,                  # out channels 
														
 
															-                 kernel_size=1,            # kernel size 
														
 
															-                 padding=0,                # padding
														
 
															-                 stride=1,                 # padding
														
 
															-                 dilation=1,               # dilation
														
 
															-                 act_type  :str = 'lrelu', # activation
														
 
															-                 norm_type :str = 'BN',    # normalization
														
 
															-                 depthwise :bool = False
														
 
															-                ):
														
 
															-        super(BasicConv, self).__init__()
														
 
															-        self.depthwise = depthwise
														
 
															-        if not depthwise:
														
 
															-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1)
														
 
															-            self.norm = get_norm(norm_type, out_dim)
														
 
															-        else:
														
 
															-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim)
														
 
															-            self.norm1 = get_norm(norm_type, in_dim)
														
 
															-            self.conv2 = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1)
														
 
															-            self.norm2 = get_norm(norm_type, out_dim)
														
 
															-        self.act  = get_activation(act_type)
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        if not self.depthwise:
														
 
															-            return self.act(self.norm(self.conv(x)))
														
 
															-        else:
														
 
															-            # Depthwise conv
														
 
															-            x = self.norm1(self.conv1(x))
														
 
															-            # Pointwise conv
														
 
															-            x = self.norm2(self.conv2(x))
														
 
															-            return x
														
 
															-
														
 
															-class UpSampleWrapper(nn.Module):
														
 
															-    """Upsample last feat map to specific stride."""
														
 
															-    def __init__(self, in_dim, upsample_factor):
														
 
															-        super(UpSampleWrapper, self).__init__()
														
 
															-        # ---------- Basic parameters ----------
														
 
															-        self.upsample_factor = upsample_factor
														
 
															-
														
 
															-        # ---------- Network parameters ----------
														
 
															-        if upsample_factor == 1:
														
 
															-            self.upsample = nn.Identity()
														
 
															-        else:
														
 
															-            scale = int(math.log2(upsample_factor))
														
 
															-            dim = in_dim
														
 
															-            layers = []
														
 
															-            for _ in range(scale-1):
														
 
															-                layers += [
														
 
															-                    nn.ConvTranspose2d(dim, dim, kernel_size=2, stride=2),
														
 
															-                    LayerNorm2D(dim),
														
 
															-                    nn.GELU()
														
 
															-                ]
														
 
															-            layers += [nn.ConvTranspose2d(dim, dim, kernel_size=2, stride=2)]
														
 
															-            self.upsample = nn.Sequential(*layers)
														
 
															-            self.out_dim = dim
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        x = self.upsample(x)
														
 
															-
														
 
															-        return x
														
 
															-
														
 
															-
														
 
															-# ----------------- RepCNN module -----------------
														
 
															-class RepVggBlock(nn.Module):
														
 
															-    def __init__(self, in_dim, out_dim, act_type='relu', norm_type='BN'):
														
 
															-        super().__init__()
														
 
															-        # ----------------- Basic parameters -----------------
														
 
															-        self.in_dim = in_dim
														
 
															-        self.out_dim = out_dim
														
 
															-        # ----------------- Network parameters -----------------
														
 
															-        self.conv1 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
														
 
															-        self.conv2 = BasicConv(in_dim, out_dim, kernel_size=1, padding=0, act_type=None, norm_type=norm_type)
														
 
															-        self.act   = get_activation(act_type) 
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        if hasattr(self, 'conv'):
														
 
															-            y = self.conv(x)
														
 
															-        else:
														
 
															-            y = self.conv1(x) + self.conv2(x)
														
 
															-
														
 
															-        return self.act(y)
														
 
															-
														
 
															-    def convert_to_deploy(self):
														
 
															-        if not hasattr(self, 'conv'):
														
 
															-            self.conv = nn.Conv2d(self.in_dim, self.out_dim, 3, 1, padding=1)
														
 
															-
														
 
															-        kernel, bias = self.get_equivalent_kernel_bias()
														
 
															-        self.conv.weight.data = kernel
														
 
															-        self.conv.bias.data = bias 
														
 
															-        # self.__delattr__('conv1')
														
 
															-        # self.__delattr__('conv2')
														
 
															-
														
 
															-    def get_equivalent_kernel_bias(self):
														
 
															-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
														
 
															-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
														
 
															-        
														
 
															-        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1
														
 
															-
														
 
															-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
														
 
															-        if kernel1x1 is None:
														
 
															-            return 0
														
 
															-        else:
														
 
															-            return F.pad(kernel1x1, [1, 1, 1, 1])
														
 
															-
														
 
															-    def _fuse_bn_tensor(self, branch: BasicConv):
														
 
															-        if branch is None:
														
 
															-            return 0, 0
														
 
															-        kernel = branch.conv.weight
														
 
															-        running_mean = branch.norm.running_mean
														
 
															-        running_var = branch.norm.running_var
														
 
															-        gamma = branch.norm.weight
														
 
															-        beta = branch.norm.bias
														
 
															-        eps = branch.norm.eps
														
 
															-        std = (running_var + eps).sqrt()
														
 
															-        t = (gamma / std).reshape(-1, 1, 1, 1)
														
 
															-        return kernel * t, beta - running_mean * gamma / std
														
 
															-
														
 
															-class RepCSPLayer(nn.Module):
														
 
															-    def __init__(self,
														
 
															-                 in_dim     :int   = 256,
														
 
															-                 out_dim    :int   = 256,
														
 
															-                 num_blocks :int   = 3,
														
 
															-                 expansion  :float = 1.0,
														
 
															-                 act_type   :str   = "relu",
														
 
															-                 norm_type  :str   = "GN",):
														
 
															-        super(RepCSPLayer, self).__init__()
														
 
															-        # ----------------- Basic parameters -----------------
														
 
															-        inter_dim = int(out_dim * expansion)
														
 
															-        # ----------------- Network parameters -----------------
														
 
															-        self.conv1 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.conv2 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.bottlenecks = nn.Sequential(*[
														
 
															-            RepVggBlock(inter_dim, inter_dim, act_type, norm_type) for _ in range(num_blocks)
														
 
															-        ])
														
 
															-        if inter_dim != out_dim:
														
 
															-            self.conv3 = BasicConv(inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        else:
														
 
															-            self.conv3 = nn.Identity()
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        x_1 = self.conv1(x)
														
 
															-        x_1 = self.bottlenecks(x_1)
														
 
															-        x_2 = self.conv2(x)
														
 
															-
														
 
															-        return self.conv3(x_1 + x_2)
														
 
															-
														
 
															-
														
 
															-# ----------------- CNN module -----------------
														
 
															-class YoloBottleneck(nn.Module):
														
 
															-    def __init__(self,
														
 
															-                 in_dim       :int,
														
 
															-                 out_dim      :int,
														
 
															-                 kernel_size  :List  = [1, 3],
														
 
															-                 expand_ratio :float = 0.5,
														
 
															-                 shortcut     :bool  = False,
														
 
															-                 act_type     :str   = 'silu',
														
 
															-                 norm_type    :str   = 'BN',
														
 
															-                 depthwise    :bool  = False,
														
 
															-                 ) -> None:
														
 
															-        super(YoloBottleneck, self).__init__()
														
 
															-        inter_dim = int(out_dim * expand_ratio)
														
 
															-        # ----------------- Network setting -----------------
														
 
															-        self.conv_layer1 = BasicConv(in_dim, inter_dim,
														
 
															-                                     kernel_size=kernel_size[0], padding=kernel_size[0]//2, stride=1,
														
 
															-                                     act_type=act_type, norm_type=norm_type)
														
 
															-        self.conv_layer2 = BasicConv(inter_dim, out_dim,
														
 
															-                                     kernel_size=kernel_size[1], padding=kernel_size[1]//2, stride=1,
														
 
															-                                     act_type=act_type, norm_type=norm_type, depthwise=depthwise)
														
 
															-        self.shortcut = shortcut and in_dim == out_dim
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        h = self.conv_layer2(self.conv_layer1(x))
														
 
															-
														
 
															-        return x + h if self.shortcut else h
														
 
															-
														
 
															-class ELANLayer(nn.Module):
														
 
															-    def __init__(self,
														
 
															-                 in_dim,
														
 
															-                 out_dim,
														
 
															-                 expand_ratio :float = 0.5,
														
 
															-                 num_blocks   :int   = 1,
														
 
															-                 shortcut     :bool  = False,
														
 
															-                 act_type     :str   = 'silu',
														
 
															-                 norm_type    :str   = 'BN',
														
 
															-                 depthwise    :bool  = False,
														
 
															-                 ) -> None:
														
 
															-        super(ELANLayer, self).__init__()
														
 
															-        self.inter_dim = round(out_dim * expand_ratio)
														
 
															-        self.input_proj  = BasicConv(in_dim, self.inter_dim * 2, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.module = nn.ModuleList([YoloBottleneck(self.inter_dim,
														
 
															-                                                    self.inter_dim,
														
 
															-                                                    kernel_size  = [3, 3],
														
 
															-                                                    expand_ratio = 1.0,
														
 
															-                                                    shortcut     = shortcut,
														
 
															-                                                    act_type     = act_type,
														
 
															-                                                    norm_type    = norm_type,
														
 
															-                                                    depthwise    = depthwise)
														
 
															-                                                    for _ in range(num_blocks)])
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        # Input proj
														
 
															-        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
														
 
															-        out = list([x1, x2])
														
 
															-
														
 
															-        # Bottlenecl
														
 
															-        out.extend(m(out[-1]) for m in self.module)
														
 
															-
														
 
															-        # Output proj
														
 
															-        out = self.output_proj(torch.cat(out, dim=1))
														
 
															-
														
 
															-        return out
														
--- a/odlab/models/basic/norm.py
+++ b/odlab/models/basic/norm.py
@@ -38,18 +38,3 @@ class FrozenBatchNorm2d(torch.nn.Module):
 
															         scale = w * (rv + eps).rsqrt()
														
 
															         bias = b - rm * scale
														
 
															         return x * scale + bias
														
 
															-
														
 
															-class LayerNorm2D(nn.Module):
														
 
															-    def __init__(self, normalized_shape, norm_layer=nn.LayerNorm):
														
 
															-        super().__init__()
														
 
															-        self.ln = norm_layer(normalized_shape) if norm_layer is not None else nn.Identity()
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        """
														
 
															-        x: N C H W
														
 
															-        """
														
 
															-        x = x.permute(0, 2, 3, 1)
														
 
															-        x = self.ln(x)
														
 
															-        x = x.permute(0, 3, 1, 2)
														
 
															-        return x
														
 
															-    
														
--- a/odlab/models/detectors/__init__.py
+++ b/odlab/models/detectors/__init__.py
@@ -1,19 +1,15 @@
 
															 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
														
 
															 import torch
														
 
															-from .retinanet.build import build_retinanet
														
 
															-from .fcos.build      import build_fcos
														
 
															-from .yolof.build     import build_yolof
														
 
															-from .detr.build      import build_detr
														
 
															+from .fcos.build  import build_fcos
														
 
															+from .yolof.build import build_yolof
														
 
															+from .detr.build  import build_detr
														
 
															 def build_model(args, cfg, num_classes=80, is_val=False):
														
 
															     # ------------ build object detector ------------
														
 
															-    ## RetinaNet    
														
 
															-    if 'retinanet' in args.model:
														
 
															-        model, criterion = build_retinanet(cfg, num_classes, is_val)
														
 
															     ## FCOS    
														
 
															-    elif 'fcos' in args.model:
														
 
															+    if 'fcos' in args.model:
														
 
															         model, criterion = build_fcos(cfg, num_classes, is_val)
														
 
															     ## YOLOF    
														
 
															     elif 'yolof' in args.model:
														
--- a/odlab/models/detectors/fcos/fcos.py
+++ b/odlab/models/detectors/fcos/fcos.py
@@ -96,7 +96,7 @@ class FCOS(nn.Module):
 
															         return bboxes, scores, labels
														
 
															-    def forward(self, src, src_mask=None, targets=None):
														
 
															+    def forward(self, src, src_mask=None):
														
 
															         # ---------------- Backbone ----------------
														
 
															         pyramid_feats = self.backbone(src)
														
--- a/odlab/models/detectors/retinanet/README.md
+++ b/odlab/models/detectors/retinanet/README.md
@@ -1,55 +0,0 @@
 
															-# RetinaNet
														
 
															-
														
 
															-Our `RetinaNet-R50-1x` baseline on COCO-val:
														
 
															-```Shell
														
 
															-
														
 
															-```
														
 
															-
														
 
															-- ImageNet-1K_V1 pretrained
														
 
															-
														
 
															-| Model             |  scale     |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | Weight | Logs  |
														
 
															-| ------------------| ---------- | ----- | ---------------------- |  ---------------  | ------ | ----- |
														
 
															-| RetinaNet_R18_1x  |  800,1333  |       |          30.5          |        48.1       | [ckpt](https://github.com/yjh0410/ODLab/releases/download/detection_weights/retinanet_r18_1x_coco.pth) | [log](https://github.com/yjh0410/ODLab/releases/download/detection_weights/RetinaNet-R18-1x.txt) |
														
 
															-| RetinaNet_R50_1x  |  800,1333  |       |                        |                   |  |  |
														
 
															-
														
 
															-
														
 
															-## Train RetinaNet
														
 
															-### Single GPU
														
 
															-Taking training **RetinaNet_R18_1x** on COCO as the example,
														
 
															-```Shell
														
 
															-python main.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --batch_size 16 --eval_epoch 2
														
 
															-```
														
 
															-
														
 
															-### Multi GPU
														
 
															-Taking training **RetinaNet_R18_1x** on COCO as the example,
														
 
															-```Shell
														
 
															-python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root path/to/coco -m retinanet_r18_1x --batch_size 16 --eval_epoch 2 
														
 
															-```
														
 
															-
														
 
															-## Test RetinaNet
														
 
															-Taking testing **RetinaNet_R18_1x** on COCO-val as the example,
														
 
															-```Shell
														
 
															-python test.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --weight path/to/retinanet_r18_1x.pth -vt 0.4 --show 
														
 
															-```
														
 
															-
														
 
															-## Evaluate RetinaNet
														
 
															-Taking evaluating **RetinaNet_R18_1x** on COCO-val as the example,
														
 
															-```Shell
														
 
															-python main.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --resume path/to/retinanet_r18_1x.pth --eval_first
														
 
															-```
														
 
															-
														
 
															-## Demo
														
 
															-### Detect with Image
														
 
															-```Shell
														
 
															-python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show
														
 
															-```
														
 
															-
														
 
															-### Detect with Video
														
 
															-```Shell
														
 
															-python demo.py --mode video --path_to_vid path/to/video --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show --gif
														
 
															-```
														
 
															-
														
 
															-### Detect with Camera
														
 
															-```Shell
														
 
															-python demo.py --mode camera --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show --gif
														
 
															-```
														
--- a/odlab/models/detectors/retinanet/build.py
+++ b/odlab/models/detectors/retinanet/build.py
@@ -1,24 +0,0 @@
 
															-#!/usr/bin/env python3
														
 
															-# -*- coding:utf-8 -*-
														
 
															-
														
 
															-from .criterion import build_criterion
														
 
															-from .retinanet import RetinaNet
														
 
															-
														
 
															-
														
 
															-# build RetinaNet
														
 
															-def build_retinanet(cfg, num_classes=80, is_val=False):
														
 
															-    # -------------- Build RetinaNet --------------
														
 
															-    model = RetinaNet(cfg         = cfg,
														
 
															-                      num_classes = num_classes,
														
 
															-                      conf_thresh = cfg['train_conf_thresh'] if is_val else cfg['test_conf_thresh'],
														
 
															-                      nms_thresh  = cfg['train_nms_thresh']  if is_val else cfg['test_nms_thresh'],
														
 
															-                      topk        = cfg['train_topk']        if is_val else cfg['test_topk'],
														
 
															-                      ca_nms      = False if is_val else cfg['nms_class_agnostic'])
														
 
															-            
														
 
															-    # -------------- Build Criterion --------------
														
 
															-    criterion = None
														
 
															-    if is_val:
														
 
															-        # build criterion for training
														
 
															-        criterion = build_criterion(cfg, num_classes)
														
 
															-
														
 
															-    return model, criterion
														
--- a/odlab/models/detectors/retinanet/criterion.py
+++ b/odlab/models/detectors/retinanet/criterion.py
@@ -1,136 +0,0 @@
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-import torch.nn.functional as F
														
 
															-
														
 
															-from utils.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
														
 
															-from utils.misc import sigmoid_focal_loss
														
 
															-from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
														
 
															-
														
 
															-from .matcher import RetinaNetMatcher
														
 
															-
														
 
															-
														
 
															-class Criterion(nn.Module):
														
 
															-    def __init__(self, cfg, num_classes=80):
														
 
															-        super().__init__()
														
 
															-        # ------------- Basic parameters -------------
														
 
															-        self.cfg = cfg
														
 
															-        self.num_classes = num_classes
														
 
															-        # ------------- Focal loss -------------
														
 
															-        self.alpha = cfg['focal_loss_alpha']
														
 
															-        self.gamma = cfg['focal_loss_gamma']
														
 
															-        # ------------- Loss weight -------------
														
 
															-        self.weight_dict = {'loss_cls': cfg['loss_cls_weight'],
														
 
															-                            'loss_reg': cfg['loss_reg_weight']}
														
 
															-        # ------------- Matcher -------------
														
 
															-        self.matcher_cfg = cfg['matcher_hpy']
														
 
															-        self.matcher = RetinaNetMatcher(num_classes,
														
 
															-                                        iou_threshold=self.matcher_cfg['iou_thresh'],
														
 
															-                                        iou_labels=self.matcher_cfg['iou_labels'],
														
 
															-                                        allow_low_quality_matches=self.matcher_cfg['allow_low_quality_matches']
														
 
															-                                        )
														
 
															-
														
 
															-    def loss_labels(self, pred_cls, tgt_cls, num_boxes):
														
 
															-        """
														
 
															-            pred_cls: (Tensor) [N, C]
														
 
															-            tgt_cls:  (Tensor) [N, C]
														
 
															-        """
														
 
															-        # cls loss: [V, C]
														
 
															-        loss_cls = sigmoid_focal_loss(pred_cls, tgt_cls, self.alpha, self.gamma)
														
 
															-
														
 
															-        return loss_cls.sum() / num_boxes
														
 
															-
														
 
															-    def loss_bboxes(self, pred_reg=None, pred_box=None, tgt_box=None, anchors=None, num_boxes=1, use_giou=False):
														
 
															-        """
														
 
															-            pred_reg: (Tensor) [Nq, 4]
														
 
															-            tgt_box:  (Tensor) [Nq, 4]
														
 
															-            anchors:  (Tensor) [Nq, 4]
														
 
															-        """
														
 
															-        # GIoU loss
														
 
															-        if use_giou:
														
 
															-            pred_giou = generalized_box_iou(pred_box, tgt_box)  # [N, M]
														
 
															-            loss_reg = 1. - torch.diag(pred_giou)
														
 
															-        
														
 
															-        # L1 loss
														
 
															-        else:
														
 
															-            # xyxy -> cxcy&bwbh
														
 
															-            tgt_cxcy = (tgt_box[..., :2] + tgt_box[..., 2:]) * 0.5
														
 
															-            tgt_bwbh = tgt_box[..., 2:] - tgt_box[..., :2]
														
 
															-            # encode gt box
														
 
															-            tgt_offsets = (tgt_cxcy - anchors[..., :2]) / anchors[..., 2:]
														
 
															-            tgt_sizes = torch.log(tgt_bwbh / anchors[..., 2:])
														
 
															-            tgt_box_encode = torch.cat([tgt_offsets, tgt_sizes], dim=-1)
														
 
															-            # compute l1 loss
														
 
															-            loss_reg = F.l1_loss(pred_reg, tgt_box_encode, reduction='none')
														
 
															-
														
 
															-        return loss_reg.sum() / num_boxes
														
 
															-
														
 
															-    def forward(self, outputs, targets):
														
 
															-        """
														
 
															-            outputs['pred_cls']: (Tensor) [B, M, C]
														
 
															-            outputs['pred_reg']: (Tensor) [B, M, 4]
														
 
															-            outputs['strides']: (List) [8, 16, 32, ...] stride of the model output
														
 
															-            targets: (List) [dict{'boxes': [...], 
														
 
															-                                 'labels': [...], 
														
 
															-                                 'orig_size': ...}, ...]
														
 
															-            anchors: (Tensor) [M, 4]
														
 
															-        """
														
 
															-        # -------------------- Pre-process --------------------
														
 
															-        cls_preds = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)
														
 
															-        reg_preds = torch.cat(outputs['pred_reg'], dim=1).view(-1, 4)
														
 
															-        box_preds = torch.cat(outputs['pred_box'], dim=1).view(-1, 4)
														
 
															-        masks = ~torch.cat(outputs['mask'], dim=1).view(-1)
														
 
															-        B = len(targets)
														
 
															-       
														
 
															-        # process anchor boxes
														
 
															-        anchor_boxes = torch.cat(outputs['anchors'])
														
 
															-        anchor_boxes = anchor_boxes[None].repeat(B, 1, 1)
														
 
															-        anchor_boxes_xyxy = box_cxcywh_to_xyxy(anchor_boxes)
														
 
															-
														
 
															-        # -------------------- Label Assignment --------------------
														
 
															-        tgt_classes, tgt_boxes = self.matcher(anchor_boxes_xyxy, targets)
														
 
															-        tgt_classes = tgt_classes.flatten()
														
 
															-        tgt_boxes = tgt_boxes.view(-1, 4)
														
 
															-        del anchor_boxes_xyxy
														
 
															-
														
 
															-        foreground_idxs = (tgt_classes >= 0) & (tgt_classes != self.num_classes)
														
 
															-        valid_idxs = (tgt_classes >= 0) & masks
														
 
															-        num_foreground = foreground_idxs.sum()
														
 
															-        if is_dist_avail_and_initialized():
														
 
															-            torch.distributed.all_reduce(num_foreground)
														
 
															-        num_foreground = torch.clamp(num_foreground / get_world_size(), min=1).item()
														
 
															-
														
 
															-        # -------------------- Classification loss --------------------
														
 
															-        gt_cls_target = torch.zeros_like(cls_preds)
														
 
															-        gt_cls_target[foreground_idxs, tgt_classes[foreground_idxs]] = 1
														
 
															-        loss_labels = self.loss_labels(
														
 
															-            cls_preds[valid_idxs], gt_cls_target[valid_idxs], num_foreground)
														
 
															-
														
 
															-        # -------------------- Regression loss --------------------
														
 
															-        if self.cfg['use_giou_loss']:
														
 
															-            box_preds_pos = box_preds[foreground_idxs]
														
 
															-            tgt_boxes_pos = tgt_boxes[foreground_idxs].to(reg_preds.device)
														
 
															-            loss_bboxes = self.loss_bboxes(
														
 
															-                pred_box=box_preds_pos, tgt_box=tgt_boxes_pos, num_boxes=num_foreground, use_giou=self.cfg['use_giou_loss'])
														
 
															-        else:
														
 
															-            reg_preds_pos = reg_preds[foreground_idxs]
														
 
															-            tgt_boxes_pos = tgt_boxes[foreground_idxs].to(reg_preds.device)
														
 
															-            anchors_pos = anchor_boxes.view(-1, 4)[foreground_idxs]
														
 
															-            loss_bboxes = self.loss_bboxes(
														
 
															-                pred_reg=reg_preds_pos, tgt_box=tgt_boxes_pos, anchors=anchors_pos, num_boxes=num_foreground, use_giou=self.cfg['use_giou_loss'])
														
 
															-
														
 
															-        loss_dict = dict(
														
 
															-                loss_cls = loss_labels,
														
 
															-                loss_reg = loss_bboxes,
														
 
															-        )
														
 
															-
														
 
															-        return loss_dict
														
 
															-
														
 
															-    
														
 
															-# build criterion
														
 
															-def build_criterion(cfg, num_classes=80):
														
 
															-    criterion = Criterion(cfg=cfg, num_classes=num_classes)
														
 
															-    return criterion
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    pass
														
--- a/odlab/models/detectors/retinanet/matcher.py
+++ b/odlab/models/detectors/retinanet/matcher.py
@@ -1,181 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# Copyright (c) Facebook, Inc. and its affiliates.
														
 
															-# Modified by BaseDetection, Inc. and its affiliates.
														
 
															-import torch
														
 
															-from utils.box_ops import box_iou
														
 
															-
														
 
															-
														
 
															-class RetinaNetMatcher(object):
														
 
															-    """
														
 
															-    This class assigns to each predicted "element" (e.g., a box) a ground-truth
														
 
															-    element. Each predicted element will have exactly zero or one matches; each
														
 
															-    ground-truth element may be matched to zero or more predicted elements.
														
 
															-
														
 
															-    The matching is determined by the MxN match_quality_matrix, that characterizes
														
 
															-    how well each (ground-truth, prediction)-pair match each other. For example,
														
 
															-    if the elements are boxes, this matrix may contain box intersection-over-union
														
 
															-    overlap values.
														
 
															-
														
 
															-    The matcher returns (a) a vector of length N containing the index of the
														
 
															-    ground-truth element m in [0, M) that matches to prediction n in [0, N).
														
 
															-    (b) a vector of length N containing the labels for each prediction.
														
 
															-    """
														
 
															-
														
 
															-    def __init__(self,
														
 
															-                 num_classes, 
														
 
															-                 iou_threshold, 
														
 
															-                 iou_labels, 
														
 
															-                 allow_low_quality_matches=False):
														
 
															-        """
														
 
															-        Args:
														
 
															-            thresholds (list): a list of thresholds used to stratify predictions
														
 
															-                into levels.
														
 
															-            labels (list): a list of values to label predictions belonging at
														
 
															-                each level. A label can be one of {-1, 0, 1} signifying
														
 
															-                {ignore, negative class, positive class}, respectively.
														
 
															-            allow_low_quality_matches (bool): if True, produce additional matches
														
 
															-                for predictions with maximum match quality lower than high_threshold.
														
 
															-                See set_low_quality_matches_ for more details.
														
 
															-
														
 
															-            For example,
														
 
															-                thresholds = [0.3, 0.5]
														
 
															-                labels = [0, -1, 1]
														
 
															-                All predictions with iou < 0.3 will be marked with 0 and
														
 
															-                thus will be considered as false positives while training.
														
 
															-                All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
														
 
															-                thus will be ignored.
														
 
															-                All predictions with 0.5 <= iou will be marked with 1 and
														
 
															-                thus will be considered as true positives.
														
 
															-        """
														
 
															-        self.num_classes = num_classes
														
 
															-        # Add -inf and +inf to first and last position in iou_thresholdhreshold
														
 
															-        iou_threshold = iou_threshold[:]
														
 
															-        assert iou_threshold[0] > 0
														
 
															-        iou_threshold.insert(0, -float("inf"))
														
 
															-        iou_threshold.append(float("inf"))
														
 
															-        assert all(low <= high for (low, high) in zip(iou_threshold[:-1], iou_threshold[1:]))
														
 
															-        assert all(label in [-1, 0, 1] for label in iou_labels)
														
 
															-        assert len(iou_labels) == len(iou_threshold) - 1
														
 
															-        self.iou_threshold = iou_threshold
														
 
															-        self.iou_labels = iou_labels
														
 
															-        self.allow_low_quality_matches = allow_low_quality_matches
														
 
															-
														
 
															-    @torch.no_grad()
														
 
															-    def __call__(self, anchors, targets):
														
 
															-        """
														
 
															-            anchors: (Tensor) [B, M, 4] (x1, y1, x2, y2)
														
 
															-            targets: (Dict) dict{'boxes': [...], 
														
 
															-                                 'labels': [...], 
														
 
															-                                 'orig_size': ...}
														
 
															-        """
														
 
															-        # list[Tensor(R, 4)], one for each image
														
 
															-        gt_classes = []
														
 
															-        gt_boxes = []
														
 
															-        device = anchors.device
														
 
															-
														
 
															-        for anchors_per_image, targets_per_image in zip(anchors, targets):
														
 
															-            # [N,]
														
 
															-            tgt_labels = targets_per_image['labels'].to(device)
														
 
															-            # [N, 4]
														
 
															-            tgt_boxes = targets_per_image['boxes'].to(device)
														
 
															-            # [N, M], N is the number of targets, M is the number of anchors
														
 
															-            match_quality_matrix, _ = box_iou(tgt_boxes, anchors_per_image)
														
 
															-            gt_matched_idxs, anchor_labels = self.matching(match_quality_matrix)
														
 
															-            has_gt = len(tgt_labels) > 0
														
 
															-            if has_gt:
														
 
															-                # ground truth box regression
														
 
															-                matched_gt_boxes = tgt_boxes[gt_matched_idxs]
														
 
															-
														
 
															-                gt_classes_i = tgt_labels[gt_matched_idxs]
														
 
															-                # Anchors with label 0 are treated as background.
														
 
															-                gt_classes_i[anchor_labels == 0] = self.num_classes
														
 
															-                # Anchors with label -1 are ignored.
														
 
															-                gt_classes_i[anchor_labels == -1] = -1
														
 
															-            else:
														
 
															-                gt_classes_i = torch.zeros_like(gt_matched_idxs) + self.num_classes
														
 
															-                matched_gt_boxes = torch.zeros_like(anchors_per_image)
														
 
															-
														
 
															-            gt_classes.append(gt_classes_i)
														
 
															-            gt_boxes.append(matched_gt_boxes)
														
 
															-
														
 
															-        return torch.stack(gt_classes), torch.stack(gt_boxes)
														
 
															-
														
 
															-    def matching(self, match_quality_matrix):
														
 
															-        """
														
 
															-        Args:
														
 
															-            match_quality_matrix (Tensor[float]): an N x M tensor, containing the
														
 
															-                pairwise quality between N ground-truth elements and M predicted
														
 
															-                elements. All elements must be >= 0 (due to the us of `torch.nonzero`
														
 
															-                for selecting indices in :meth:`set_low_quality_matches_`).
														
 
															-
														
 
															-        Returns:
														
 
															-            matches (Tensor[int64]): a vector of length M, where matches[i] is a matched
														
 
															-                ground-truth index in [0, N)
														
 
															-            match_labels (Tensor[int8]): a vector of length M, where pred_labels[i] indicates
														
 
															-                whether a prediction is a true or false positive or ignored
														
 
															-        """
														
 
															-        assert match_quality_matrix.dim() == 2
														
 
															-        if match_quality_matrix.numel() == 0:
														
 
															-            default_matches = match_quality_matrix.new_full(
														
 
															-                (match_quality_matrix.size(1),), 0, dtype=torch.int64
														
 
															-            )
														
 
															-            # When no gt boxes exist, we define IOU = 0 and therefore set labels
														
 
															-            # to `self.labels[0]`, which usually defaults to background class 0
														
 
															-            # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
														
 
															-            default_match_labels = match_quality_matrix.new_full(
														
 
															-                (match_quality_matrix.size(1),), self.iou_labels[0], dtype=torch.int8
														
 
															-            )
														
 
															-            return default_matches, default_match_labels
														
 
															-
														
 
															-        assert torch.all(match_quality_matrix >= 0)
														
 
															-
														
 
															-        # match_quality_matrix is N (gt) x M (predicted)
														
 
															-        # Max over gt elements (dim 0) to find best gt candidate for each prediction
														
 
															-        matched_vals, matches = match_quality_matrix.max(dim=0)
														
 
															-
														
 
															-        match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
														
 
															-
														
 
															-        for (l, low, high) in zip(self.iou_labels, self.iou_threshold[:-1], self.iou_threshold[1:]):
														
 
															-            low_high = (matched_vals >= low) & (matched_vals < high)
														
 
															-            match_labels[low_high] = l
														
 
															-
														
 
															-        if self.allow_low_quality_matches:
														
 
															-            self.set_low_quality_matches_(match_labels, match_quality_matrix)
														
 
															-
														
 
															-        return matches, match_labels
														
 
															-
														
 
															-    def set_low_quality_matches_(self, match_labels, match_quality_matrix):
														
 
															-        """
														
 
															-        Produce additional matches for predictions that have only low-quality matches.
														
 
															-        Specifically, for each ground-truth G find the set of predictions that have
														
 
															-        maximum overlap with it (including ties); for each prediction in that set, if
														
 
															-        it is unmatched, then match it to the ground-truth G.
														
 
															-
														
 
															-        This function implements the RPN assignment case (i) in Sec. 3.1.2 of the
														
 
															-        Faster R-CNN paper: https://arxiv.org/pdf/1506.01497v3.pdf.
														
 
															-        """
														
 
															-        # For each gt, find the prediction with which it has highest quality
														
 
															-        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
														
 
															-        # Find the highest quality match available, even if it is low, including ties.
														
 
															-        # Note that the matches qualities must be positive due to the use of
														
 
															-        # `torch.nonzero`.
														
 
															-        gt_pred_pairs_of_highest_quality = torch.nonzero(
														
 
															-            match_quality_matrix == highest_quality_foreach_gt[:, None],
														
 
															-            as_tuple=False
														
 
															-        )
														
 
															-        # Example gt_pred_pairs_of_highest_quality:
														
 
															-        #   tensor([[    0, 39796],
														
 
															-        #           [    1, 32055],
														
 
															-        #           [    1, 32070],
														
 
															-        #           [    2, 39190],
														
 
															-        #           [    2, 40255],
														
 
															-        #           [    3, 40390],
														
 
															-        #           [    3, 41455],
														
 
															-        #           [    4, 45470],
														
 
															-        #           [    5, 45325],
														
 
															-        #           [    5, 46390]])
														
 
															-        # Each row is a (gt index, prediction index)
														
 
															-        # Note how gt items 1, 2, 3, and 5 each have two ties
														
 
															-
														
 
															-        pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
														
 
															-        match_labels[pred_inds_to_update] = 1
														
--- a/odlab/models/detectors/retinanet/retinanet.py
+++ b/odlab/models/detectors/retinanet/retinanet.py
@@ -1,123 +0,0 @@
 
															-import numpy as np
														
 
															-import math
														
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-
														
 
															-# --------------- Model components ---------------
														
 
															-from ...backbone import build_backbone
														
 
															-from ...neck import build_neck
														
 
															-from ...head import build_head
														
 
															-
														
 
															-# --------------- External components ---------------
														
 
															-from utils.misc import multiclass_nms
														
 
															-
														
 
															-
														
 
															-# ------------------------ RetinaNet ------------------------
														
 
															-class RetinaNet(nn.Module):
														
 
															-    def __init__(self, 
														
 
															-                 cfg,
														
 
															-                 num_classes :int   = 80, 
														
 
															-                 conf_thresh :float = 0.05,
														
 
															-                 nms_thresh  :float = 0.6,
														
 
															-                 topk        :int   = 1000,
														
 
															-                 trainable   :bool  = False,
														
 
															-                 ca_nms      :bool  = False):
														
 
															-        super(RetinaNet, self).__init__()
														
 
															-        # ---------------------- Basic Parameters ----------------------
														
 
															-        self.cfg = cfg
														
 
															-        self.trainable = trainable
														
 
															-        self.topk = topk
														
 
															-        self.num_classes = num_classes
														
 
															-        self.conf_thresh = conf_thresh
														
 
															-        self.nms_thresh = nms_thresh
														
 
															-        self.ca_nms = ca_nms
														
 
															-
														
 
															-        # ---------------------- Network Parameters ----------------------
														
 
															-        ## Backbone
														
 
															-        self.backbone, feat_dims = build_backbone(cfg, trainable&cfg['pretrained'])
														
 
															-
														
 
															-        ## Neck
														
 
															-        self.fpn = build_neck(cfg, feat_dims, cfg['head_dim'])
														
 
															-        
														
 
															-        ## Heads
														
 
															-        self.head = build_head(cfg, cfg['head_dim'], cfg['head_dim'], num_classes)
														
 
															-
														
 
															-    def post_process(self, cls_preds, box_preds):
														
 
															-        """
														
 
															-        Input:
														
 
															-            cls_preds: List(Tensor) [[B, H x W, KA x C], ...]
														
 
															-            box_preds: List(Tensor) [[B, H x W, KA x 4], ...]
														
 
															-        """
														
 
															-        all_scores = []
														
 
															-        all_labels = []
														
 
															-        all_bboxes = []
														
 
															-        
														
 
															-        for cls_pred_i, box_pred_i in zip(cls_preds, box_preds):
														
 
															-            cls_pred_i = cls_pred_i[0]
														
 
															-            box_pred_i = box_pred_i[0]
														
 
															-            
														
 
															-            # (H x W x KA x C,)
														
 
															-            scores_i = cls_pred_i.sigmoid().flatten()
														
 
															-
														
 
															-            # Keep top k top scoring indices only.
														
 
															-            num_topk = min(self.topk, box_pred_i.size(0))
														
 
															-
														
 
															-            # torch.sort is actually faster than .topk (at least on GPUs)
														
 
															-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
														
 
															-            topk_scores = predicted_prob[:num_topk]
														
 
															-            topk_idxs = topk_idxs[:num_topk]
														
 
															-
														
 
															-            # filter out the proposals with low confidence score
														
 
															-            keep_idxs = topk_scores > self.conf_thresh
														
 
															-            topk_idxs = topk_idxs[keep_idxs]
														
 
															-
														
 
															-            # final scores
														
 
															-            scores = topk_scores[keep_idxs]
														
 
															-            # final labels
														
 
															-            labels = topk_idxs % self.num_classes
														
 
															-            # final bboxes
														
 
															-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
														
 
															-            bboxes = box_pred_i[anchor_idxs]
														
 
															-
														
 
															-            all_scores.append(scores)
														
 
															-            all_labels.append(labels)
														
 
															-            all_bboxes.append(bboxes)
														
 
															-
														
 
															-        scores = torch.cat(all_scores)
														
 
															-        labels = torch.cat(all_labels)
														
 
															-        bboxes = torch.cat(all_bboxes)
														
 
															-
														
 
															-        # to cpu & numpy
														
 
															-        scores = scores.cpu().numpy()
														
 
															-        labels = labels.cpu().numpy()
														
 
															-        bboxes = bboxes.cpu().numpy()
														
 
															-
														
 
															-        # nms
														
 
															-        scores, labels, bboxes = multiclass_nms(
														
 
															-            scores, labels, bboxes, self.nms_thresh, self.num_classes, self.ca_nms)
														
 
															-
														
 
															-        return bboxes, scores, labels
														
 
															-
														
 
															-    def forward(self, src, src_mask=None, targets=None):
														
 
															-        # ---------------- Backbone ----------------
														
 
															-        pyramid_feats = self.backbone(src)
														
 
															-
														
 
															-        # ---------------- Neck ----------------
														
 
															-        pyramid_feats = self.fpn(pyramid_feats)
														
 
															-
														
 
															-        # ---------------- Heads ----------------
														
 
															-        outputs = self.head(pyramid_feats, src_mask)
														
 
															-
														
 
															-        if not self.training:
														
 
															-            # ---------------- PostProcess ----------------
														
 
															-            cls_pred = outputs["pred_cls"]
														
 
															-            box_pred = outputs["pred_box"]
														
 
															-            bboxes, scores, labels = self.post_process(cls_pred, box_pred)
														
 
															-            # normalize bbox
														
 
															-            bboxes[..., 0::2] /= src.shape[-1]
														
 
															-            bboxes[..., 1::2] /= src.shape[-2]
														
 
															-            bboxes = bboxes.clip(0., 1.)
														
 
															-
														
 
															-            return bboxes, scores, labels
														
 
															-
														
 
															-        return outputs 
														
--- a/odlab/models/detectors/yolof/yolof.py
+++ b/odlab/models/detectors/yolof/yolof.py
@@ -81,7 +81,7 @@ class YOLOF(nn.Module):
 
															         return bboxes, scores, labels
														
 
															-    def forward(self, src, src_mask=None, targets=None):
														
 
															+    def forward(self, src, src_mask=None):
														
 
															         # ---------------- Backbone ----------------
														
 
															         pyramid_feats = self.backbone(src)
														
--- a/odlab/models/head/__init__.py
+++ b/odlab/models/head/__init__.py
@@ -1,6 +1,5 @@
 
															-from .retinanet_head import RetinaNetHead
														
 
															-from .yolof_head     import YOLOFHead
														
 
															-from .fcos_head      import FCOSHead
														
 
															+from .yolof_head     import YolofHead
														
 
															+from .fcos_head      import FcosHead
														
 
															 # build head
														
@@ -8,18 +7,8 @@ def build_head(cfg, in_dim, out_dim, num_classes):
 
															     print('==============================')
														
 
															     print('Head: {}'.format(cfg['head']))
														
 
															-    if cfg['head'] == 'retinanet_head':
														
 
															-        model = RetinaNetHead(cfg          = cfg,
														
 
															-                              in_dim       = in_dim,
														
 
															-                              out_dim      = out_dim,
														
 
															-                              num_classes  = num_classes,
														
 
															-                              num_cls_head = cfg['num_cls_head'],
														
 
															-                              num_reg_head = cfg['num_reg_head'],
														
 
															-                              act_type     = cfg['head_act'],
														
 
															-                              norm_type    = cfg['head_norm']
														
 
															-                              )
														
 
															-    elif cfg['head'] == 'fcos_head':
														
 
															-        model = FCOSHead(cfg          = cfg,
														
 
															+    if cfg['head'] == 'fcos_head':
														
 
															+        model = FcosHead(cfg          = cfg,
														
 
															                          in_dim       = in_dim,
														
 
															                          out_dim      = out_dim,
														
 
															                          num_classes  = num_classes,
														
@@ -29,7 +18,7 @@ def build_head(cfg, in_dim, out_dim, num_classes):
 
															                          norm_type    = cfg['head_norm']
														
 
															                          )
														
 
															     elif cfg['head'] == 'yolof_head':
														
 
															-        model = YOLOFHead(cfg          = cfg,
														
 
															+        model = YolofHead(cfg          = cfg,
														
 
															                           in_dim       = in_dim,
														
 
															                           out_dim      = out_dim,
														
 
															                           num_classes  = num_classes,
														
--- a/odlab/models/head/fcos_head.py
+++ b/odlab/models/head/fcos_head.py
@@ -25,7 +25,7 @@ class Scale(nn.Module):
 
															         return x * self.scale
														
 
															-class FCOSHead(nn.Module):
														
 
															+class FcosHead(nn.Module):
														
 
															     def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
														
 
															         super().__init__()
														
 
															         self.fmp_size = None
														
--- a/odlab/models/head/retinanet_head.py
+++ b/odlab/models/head/retinanet_head.py
@@ -1,203 +0,0 @@
 
															-import math
														
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-
														
 
															-from ..basic.conv import ConvModule
														
 
															-
														
 
															-
														
 
															-class RetinaNetHead(nn.Module):
														
 
															-    def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
														
 
															-        super().__init__()
														
 
															-        self.fmp_size = None
														
 
															-        self.DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
														
 
															-        # ------------------ Basic parameters -------------------
														
 
															-        self.cfg = cfg
														
 
															-        self.in_dim = in_dim
														
 
															-        self.num_classes = num_classes
														
 
															-        self.num_cls_head=num_cls_head
														
 
															-        self.num_reg_head=num_reg_head
														
 
															-        self.act_type=act_type
														
 
															-        self.norm_type=norm_type
														
 
															-        self.stride = cfg['out_stride']
														
 
															-        # ------------------ Anchor parameters -------------------
														
 
															-        self.anchor_size = self.get_anchor_sizes(cfg)  # [S, KA, 2]
														
 
															-        self.num_anchors = self.anchor_size.shape[1]
														
 
															-
														
 
															-        # ------------------ Network parameters -------------------
														
 
															-        ## cls head
														
 
															-        cls_heads = []
														
 
															-        self.cls_head_dim = out_dim
														
 
															-        for i in range(self.num_cls_head):
														
 
															-            if i == 0:
														
 
															-                cls_heads.append(
														
 
															-                    ConvModule(in_dim, self.cls_head_dim, k=3, p=1, s=1, 
														
 
															-                               act_type=self.act_type,
														
 
															-                               norm_type=self.norm_type)
														
 
															-                               )
														
 
															-            else:
														
 
															-                cls_heads.append(
														
 
															-                    ConvModule(self.cls_head_dim, self.cls_head_dim, k=3, p=1, s=1, 
														
 
															-                               act_type=self.act_type,
														
 
															-                               norm_type=self.norm_type)
														
 
															-                               )
														
 
															-        ## reg head
														
 
															-        reg_heads = []
														
 
															-        self.reg_head_dim = out_dim
														
 
															-        for i in range(self.num_reg_head):
														
 
															-            if i == 0:
														
 
															-                reg_heads.append(
														
 
															-                    ConvModule(in_dim, self.reg_head_dim, k=3, p=1, s=1, 
														
 
															-                               act_type=self.act_type,
														
 
															-                               norm_type=self.norm_type)
														
 
															-                               )
														
 
															-            else:
														
 
															-                reg_heads.append(
														
 
															-                    ConvModule(self.reg_head_dim, self.reg_head_dim, k=3, p=1, s=1, 
														
 
															-                               act_type=self.act_type,
														
 
															-                               norm_type=self.norm_type)
														
 
															-                               )
														
 
															-        self.cls_heads = nn.Sequential(*cls_heads)
														
 
															-        self.reg_heads = nn.Sequential(*reg_heads)
														
 
															-
														
 
															-        ## pred layers
														
 
															-        self.cls_pred = nn.Conv2d(self.cls_head_dim, num_classes * self.num_anchors, kernel_size=3, padding=1)
														
 
															-        self.reg_pred = nn.Conv2d(self.reg_head_dim, 4 * self.num_anchors, kernel_size=3, padding=1)
														
 
															-
														
 
															-        # init bias
														
 
															-        self._init_layers()
														
 
															-
														
 
															-    def _init_layers(self):
														
 
															-        for module in [self.cls_heads, self.reg_heads, self.cls_pred, self.reg_pred]:
														
 
															-            for layer in module.modules():
														
 
															-                if isinstance(layer, nn.Conv2d):
														
 
															-                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
														
 
															-                    torch.nn.init.constant_(layer.bias, 0)
														
 
															-                if isinstance(layer, nn.GroupNorm):
														
 
															-                    torch.nn.init.constant_(layer.weight, 1)
														
 
															-                    torch.nn.init.constant_(layer.bias, 0)
														
 
															-        # init the bias of cls pred
														
 
															-        init_prob = 0.01
														
 
															-        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
														
 
															-        torch.nn.init.constant_(self.cls_pred.bias, bias_value)
														
 
															-        
														
 
															-    def get_anchor_sizes(self, cfg):
														
 
															-        basic_anchor_size =   cfg['anchor_config']['basic_size']
														
 
															-        anchor_aspect_ratio = cfg['anchor_config']['aspect_ratio']
														
 
															-        anchor_area_scale =   cfg['anchor_config']['area_scale']
														
 
															-
														
 
															-        num_scales = len(basic_anchor_size)
														
 
															-        num_anchors = len(anchor_aspect_ratio) * len(anchor_area_scale)
														
 
															-        anchor_sizes = []
														
 
															-        for size in basic_anchor_size:
														
 
															-            for ar in anchor_aspect_ratio:
														
 
															-                for s in anchor_area_scale:
														
 
															-                    ah, aw = size
														
 
															-                    area = ah * aw * s
														
 
															-                    anchor_sizes.append(
														
 
															-                        [torch.sqrt(torch.tensor(ar * area)),
														
 
															-                         torch.sqrt(torch.tensor(area / ar))]
														
 
															-                         )
														
 
															-        # [S * KA, 2] -> [S, KA, 2]
														
 
															-        anchor_sizes = torch.as_tensor(anchor_sizes).view(num_scales, num_anchors, 2)
														
 
															-
														
 
															-        return anchor_sizes
														
 
															-
														
 
															-    def get_anchors(self, level, fmp_size):
														
 
															-        """
														
 
															-            fmp_size: (List) [H, W]
														
 
															-        """
														
 
															-        # generate grid cells
														
 
															-        fmp_h, fmp_w = fmp_size
														
 
															-        # [KA, 2]
														
 
															-        anchor_size = self.anchor_size[level]
														
 
															-
														
 
															-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															-        # [H, W, 2] -> [HW, 2]
														
 
															-        anchor_xy = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2) + 0.5
														
 
															-        # [HW, 2] -> [HW, 1, 2] -> [HW, KA, 2] 
														
 
															-        anchor_xy = anchor_xy[:, None, :].repeat(1, self.num_anchors, 1)
														
 
															-        anchor_xy *= self.stride[level]
														
 
															-
														
 
															-        # [KA, 2] -> [1, KA, 2] -> [HW, KA, 2]
														
 
															-        anchor_wh = anchor_size[None, :, :].repeat(fmp_h*fmp_w, 1, 1)
														
 
															-
														
 
															-        # [HW, KA, 4] -> [M, 4], M = HW x KA
														
 
															-        anchor_boxes = torch.cat([anchor_xy, anchor_wh], dim=-1)
														
 
															-        anchor_boxes = anchor_boxes.view(-1, 4)
														
 
															-
														
 
															-        return anchor_boxes
														
 
															-        
														
 
															-    def decode_boxes(self, anchor_boxes, pred_reg):
														
 
															-        """
														
 
															-            anchor_boxes: (List[Tensor]) [1, M, 4] or [M, 4]
														
 
															-            pred_reg:     (List[Tensor]) [B, M, 4] or [M, 4]
														
 
															-        """
														
 
															-        # x = x_anchor + dx * w_anchor
														
 
															-        # y = y_anchor + dy * h_anchor
														
 
															-        pred_ctr_offset = pred_reg[..., :2] * anchor_boxes[..., 2:]
														
 
															-        pred_ctr_xy = anchor_boxes[..., :2] + pred_ctr_offset
														
 
															-
														
 
															-        # w = w_anchor * exp(tw)
														
 
															-        # h = h_anchor * exp(th)
														
 
															-        pred_dwdh = pred_reg[..., 2:]
														
 
															-        pred_dwdh = torch.clamp(pred_dwdh, max=self.DEFAULT_SCALE_CLAMP)
														
 
															-        pred_wh = anchor_boxes[..., 2:] * pred_dwdh.exp()
														
 
															-
														
 
															-        # convert [x, y, w, h] -> [x1, y1, x2, y2]
														
 
															-        pred_x1y1 = pred_ctr_xy - 0.5 * pred_wh
														
 
															-        pred_x2y2 = pred_ctr_xy + 0.5 * pred_wh
														
 
															-        pred_box = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
														
 
															-
														
 
															-        return pred_box
														
 
															-
														
 
															-    def forward(self, pyramid_feats, mask=None):
														
 
															-        all_masks = []
														
 
															-        all_anchors = []
														
 
															-        all_cls_preds = []
														
 
															-        all_reg_preds = []
														
 
															-        all_box_preds = []
														
 
															-        for level, feat in enumerate(pyramid_feats):
														
 
															-            # ------------------- Decoupled head -------------------
														
 
															-            cls_feat = self.cls_heads(feat)
														
 
															-            reg_feat = self.reg_heads(feat)
														
 
															-
														
 
															-            # ------------------- Generate anchor box -------------------
														
 
															-            B, _, H, W = cls_feat.size()
														
 
															-            fmp_size = [H, W]
														
 
															-            anchor_boxes = self.get_anchors(level, fmp_size)   # [M, 4]
														
 
															-            anchor_boxes = anchor_boxes.to(cls_feat.device)
														
 
															-
														
 
															-            # ------------------- Predict -------------------
														
 
															-            cls_pred = self.cls_pred(cls_feat)
														
 
															-            reg_pred = self.reg_pred(reg_feat)
														
 
															-
														
 
															-            # ------------------- Process preds -------------------
														
 
															-            ## [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
														
 
															-            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
														
 
															-            reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
														
 
															-            ## Decode bbox
														
 
															-            box_pred = self.decode_boxes(anchor_boxes, reg_pred)
														
 
															-            ## Adjust mask
														
 
															-            if mask is not None:
														
 
															-                # [B, H, W]
														
 
															-                mask_i = torch.nn.functional.interpolate(mask[None].float(), size=[H, W]).bool()[0]
														
 
															-                # [B, H, W] -> [B, M]
														
 
															-                mask_i = mask_i.flatten(1)     
														
 
															-                # [B, HW] -> [B, HW, KA] -> [B, M], M= HW x KA
														
 
															-                mask_i = mask_i[..., None].repeat(1, 1, self.num_anchors).flatten(1)
														
 
															-                
														
 
															-                all_masks.append(mask_i)
														
 
															-                
														
 
															-            all_anchors.append(anchor_boxes)
														
 
															-            all_cls_preds.append(cls_pred)
														
 
															-            all_reg_preds.append(reg_pred)
														
 
															-            all_box_preds.append(box_pred)
														
 
															-
														
 
															-        outputs = {"pred_cls": all_cls_preds,  # List [B, M, C]
														
 
															-                   "pred_reg": all_reg_preds,  # List [B, M, 4]
														
 
															-                   "pred_box": all_box_preds,  # List [B, M, 4]
														
 
															-                   "anchors": all_anchors,     # List [B, M, 2]
														
 
															-                   "strides": self.stride,
														
 
															-                   "mask": all_masks}          # List [B, M,]
														
 
															-
														
 
															-        return outputs 
														
--- a/odlab/models/head/yolof_head.py
+++ b/odlab/models/head/yolof_head.py
@@ -5,7 +5,7 @@ import torch.nn as nn
 
															 from ..basic.conv import ConvModule
														
 
															-class YOLOFHead(nn.Module):
														
 
															+class YolofHead(nn.Module):
														
 
															     def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
														
 
															         super().__init__()
														
 
															         self.fmp_size = None
														
--- a/odlab/models/neck/hybrid_encoder.py
+++ b/odlab/models/neck/hybrid_encoder.py
@@ -1,142 +0,0 @@
 
															-from typing import List
														
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-import torch.nn.functional as F
														
 
															-
														
 
															-from ..basic.conv import BasicConv, RepCSPLayer
														
 
															-from ..basic.transformer import TransformerEncoder
														
 
															-
														
 
															-
														
 
															-# -------------- Feature Pyramid Network + Transformer Encoder --------------
														
 
															-class HybridEncoder(nn.Module):
														
 
															-    def __init__(self, 
														
 
															-                 in_dims        :List  = [256, 512, 1024],
														
 
															-                 out_dim        :int   = 256,
														
 
															-                 num_blocks     :int   = 3,
														
 
															-                 expansion      :float = 1.0,
														
 
															-                 act_type       :str   = 'silu',
														
 
															-                 norm_type      :str   = 'GN',
														
 
															-                 depthwise      :bool  = False,
														
 
															-                 # Transformer's parameters
														
 
															-                 num_heads      :int   = 8,
														
 
															-                 num_layers     :int   = 1,
														
 
															-                 ffn_dim        :int   = 1024,
														
 
															-                 dropout        :float = 0.1,
														
 
															-                 pe_temperature :float = 10000.,
														
 
															-                 en_act_type    :str   = 'gelu',
														
 
															-                 en_pre_norm    :bool  = False,
														
 
															-                 ) -> None:
														
 
															-        super(HybridEncoder, self).__init__()
														
 
															-        # ---------------- Basic parameters ----------------
														
 
															-        self.in_dims = in_dims
														
 
															-        self.out_dim = out_dim
														
 
															-        self.out_dims = [self.out_dim] * len(in_dims)
														
 
															-        self.num_heads = num_heads
														
 
															-        self.num_layers = num_layers
														
 
															-        self.ffn_dim = ffn_dim
														
 
															-        c3, c4, c5 = in_dims
														
 
															-
														
 
															-        # ---------------- Input projs ----------------
														
 
															-        self.input_proj_1 = BasicConv(c5, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.input_proj_2 = BasicConv(c4, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-        self.input_proj_3 = BasicConv(c3, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
														
 
															-
														
 
															-        # ---------------- Transformer Encoder ----------------
														
 
															-        self.transformer_encoder = TransformerEncoder(d_model        = self.out_dim,
														
 
															-                                                      num_heads      = num_heads,
														
 
															-                                                      num_layers     = num_layers,
														
 
															-                                                      ffn_dim        = ffn_dim,
														
 
															-                                                      pe_temperature = pe_temperature,
														
 
															-                                                      dropout        = dropout,
														
 
															-                                                      act_type       = en_act_type,
														
 
															-                                                      pre_norm       = en_pre_norm,
														
 
															-                                                      )
														
 
															-
														
 
															-        # ---------------- Top dwon FPN ----------------
														
 
															-        ## P5 -> P4
														
 
															-        self.reduce_layer_1 = BasicConv(self.out_dim, self.out_dim,
														
 
															-                                        kernel_size=1, padding=0, stride=1,
														
 
															-                                        act_type=act_type, norm_type=norm_type)
														
 
															-        self.top_down_layer_1 = RepCSPLayer(in_dim      = self.out_dim * 2,
														
 
															-                                            out_dim     = self.out_dim,
														
 
															-                                            num_blocks  = num_blocks,
														
 
															-                                            expansion   = expansion,
														
 
															-                                            act_type    = act_type,
														
 
															-                                            norm_type   = norm_type,
														
 
															-                                            )
														
 
															-        ## P4 -> P3
														
 
															-        self.reduce_layer_2 = BasicConv(self.out_dim, self.out_dim,
														
 
															-                                        kernel_size=1, padding=0, stride=1,
														
 
															-                                        act_type=act_type, norm_type=norm_type)
														
 
															-        self.top_down_layer_2 = RepCSPLayer(in_dim      = self.out_dim * 2,
														
 
															-                                            out_dim     = self.out_dim,
														
 
															-                                            num_blocks  = num_blocks,
														
 
															-                                            expansion   = expansion,
														
 
															-                                            act_type    = act_type,
														
 
															-                                            norm_type   = norm_type,
														
 
															-                                            )
														
 
															-        
														
 
															-        # ---------------- Bottom up PAN----------------
														
 
															-        ## P3 -> P4
														
 
															-        self.dowmsample_layer_1 = BasicConv(self.out_dim, self.out_dim,
														
 
															-                                            kernel_size=3, padding=1, stride=2,
														
 
															-                                            act_type=act_type, norm_type=norm_type, depthwise=depthwise)
														
 
															-        self.bottom_up_layer_1 = RepCSPLayer(in_dim      = self.out_dim * 2,
														
 
															-                                             out_dim     = self.out_dim,
														
 
															-                                             num_blocks  = num_blocks,
														
 
															-                                             expansion   = expansion,
														
 
															-                                             act_type    = act_type,
														
 
															-                                             norm_type   = norm_type,
														
 
															-                                             )
														
 
															-        ## P4 -> P5
														
 
															-        self.dowmsample_layer_2 = BasicConv(self.out_dim, self.out_dim,
														
 
															-                                            kernel_size=3, padding=1, stride=2,
														
 
															-                                            act_type=act_type, norm_type=norm_type, depthwise=depthwise)
														
 
															-        self.bottom_up_layer_2 = RepCSPLayer(in_dim      = self.out_dim * 2,
														
 
															-                                             out_dim     = self.out_dim,
														
 
															-                                             num_blocks  = num_blocks,
														
 
															-                                             expansion   = expansion,
														
 
															-                                             act_type    = act_type,
														
 
															-                                             norm_type   = norm_type,
														
 
															-                                             )
														
 
															-
														
 
															-        self.init_weights()
														
 
															-  
														
 
															-    def init_weights(self):
														
 
															-        """Initialize the parameters."""
														
 
															-        for m in self.modules():
														
 
															-            if isinstance(m, torch.nn.Conv2d):
														
 
															-                # In order to be consistent with the source code,
														
 
															-                # reset the Conv2d initialization parameters
														
 
															-                m.reset_parameters()
														
 
															-
														
 
															-    def forward(self, features):
														
 
															-        c3, c4, c5 = features
														
 
															-
														
 
															-        # -------- Input projs --------
														
 
															-        p5 = self.input_proj_1(c5)
														
 
															-        p4 = self.input_proj_2(c4)
														
 
															-        p3 = self.input_proj_3(c3)
														
 
															-
														
 
															-        # -------- Transformer encoder --------
														
 
															-        p5 = self.transformer_encoder(p5)
														
 
															-
														
 
															-        # -------- Top down FPN --------
														
 
															-        p5_in = self.reduce_layer_1(p5)
														
 
															-        p5_up = F.interpolate(p5_in, size=p4.shape[2:])
														
 
															-        p4 = self.top_down_layer_1(torch.cat([p4, p5_up], dim=1))
														
 
															-
														
 
															-        p4_in = self.reduce_layer_2(p4)
														
 
															-        p4_up = F.interpolate(p4_in, size=p3.shape[2:])
														
 
															-        p3 = self.top_down_layer_2(torch.cat([p3, p4_up], dim=1))
														
 
															-
														
 
															-        # -------- Bottom up PAN --------
														
 
															-        p3_ds = self.dowmsample_layer_1(p3)
														
 
															-        p4 = self.bottom_up_layer_1(torch.cat([p4_in, p3_ds], dim=1))
														
 
															-
														
 
															-        p4_ds = self.dowmsample_layer_2(p4)
														
 
															-        p5 = self.bottom_up_layer_2(torch.cat([p5_in, p4_ds], dim=1))
														
 
															-
														
 
															-        out_feats = [p3, p4, p5]
														
 
															-        
														
 
															-        return out_feats
														
--- a/odlab/test.py
+++ b/odlab/test.py
@@ -11,6 +11,7 @@ from datasets import build_dataset, build_transform
 
															 # load some utils
														
 
															 from utils.misc import load_weight, compute_flops
														
 
															+from utils.vis_tools import visualize
														
 
															 from config import build_config
														
 
															 from models.detectors import build_model
														
@@ -31,8 +32,6 @@ def parse_args():
 
															                         help='Final confidence threshold')
														
 
															     parser.add_argument('-ws', '--window_scale', default=1.0, type=float,
														
 
															                         help='resize window of cv2 for visualization.')
														
 
															-    parser.add_argument('--resave', action='store_true', default=False, 
														
 
															-                        help='resave checkpoints without optimizer state dict.')
														
 
															     # Model
														
 
															     parser.add_argument('-m', '--model', default='yolof_r18_c5_1x', type=str,
														
 
															                         help='build detector')
														
@@ -48,41 +47,8 @@ def parse_args():
 
															     return parser.parse_args()
														
 
															-def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
														
 
															-    x1, y1, x2, y2 = bbox
														
 
															-    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
														
 
															-    t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0]
														
 
															-    # plot bbox
														
 
															-    cv2.rectangle(img, (x1, y1), (x2, y2), cls_color, 2)
														
 
															-    
														
 
															-    if label is not None:
														
 
															-        # plot title bbox
														
 
															-        cv2.rectangle(img, (x1, y1-t_size[1]), (int(x1 + t_size[0] * text_scale), y1), cls_color, -1)
														
 
															-        # put the test on the title bbox
														
 
															-        cv2.putText(img, label, (int(x1), int(y1 - 5)), 0, text_scale, (0, 0, 0), 1, lineType=cv2.LINE_AA)
														
 
															-
														
 
															-    return img
														
 
															-
														
 
															-def visualize(img, 
														
 
															-              bboxes, 
														
 
															-              scores, 
														
 
															-              labels, 
														
 
															-              vis_thresh, 
														
 
															-              class_colors, 
														
 
															-              class_names):
														
 
															-    ts = 0.4
														
 
															-    for i, bbox in enumerate(bboxes):
														
 
															-        if scores[i] > vis_thresh:
														
 
															-            cls_id = int(labels[i])
														
 
															-            cls_color = class_colors[cls_id]
														
 
															-                
														
 
															-            mess = '%s: %.2f' % (class_names[cls_id], scores[i])
														
 
															-            img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts)
														
 
															-
														
 
															-    return img
														
 
															-        
														
 
															 @torch.no_grad()
														
 
															-def run(args, model, device, dataset, transform, class_colors, class_names):
														
 
															+def test_det(args, model, device, dataset, transform, class_colors, class_names):
														
 
															     num_images = len(dataset)
														
 
															     save_path = os.path.join('det_results/', args.dataset, args.model)
														
 
															     os.makedirs(save_path, exist_ok=True)
														
@@ -97,7 +63,10 @@ def run(args, model, device, dataset, transform, class_colors, class_names):
 
															         # Inference
														
 
															         t0 = time.time()
														
 
															-        bboxes, scores, labels = model(x)
														
 
															+        outputs = model(x)
														
 
															+        scores = outputs['scores']
														
 
															+        labels = outputs['labels']
														
 
															+        bboxes = outputs['bboxes']
														
 
															         print("Infer. time: {}".format(time.time() - t0, "s"))
														
 
															         # Rescale bboxes
														
@@ -105,10 +74,12 @@ def run(args, model, device, dataset, transform, class_colors, class_names):
 
															         bboxes[..., 1::2] *= orig_h
														
 
															         # vis detection
														
 
															-        image = np.array(image).astype(np.uint8)
														
 
															-        image = image[..., (2, 1, 0)].copy()
														
 
															-        img_processed = visualize(
														
 
															-            image, bboxes, scores, labels, args.visual_threshold, class_colors, class_names)
														
 
															+        img_processed = visualize(image=image,
														
 
															+                                  bboxes=bboxes,
														
 
															+                                  scores=scores,
														
 
															+                                  labels=labels,
														
 
															+                                  class_colors=class_colors,
														
 
															+                                  class_names=class_names)
														
 
															         if args.show:
														
 
															             h, w = img_processed.shape[:2]
														
 
															             sw, sh = int(w*args.window_scale), int(h*args.window_scale)
														
@@ -138,16 +109,15 @@ if __name__ == '__main__':
 
															     transform = build_transform(cfg, is_train=False)
														
 
															     # Dataset
														
 
															-    dataset, dataset_info = build_dataset(args, is_train=False)
														
 
															+    dataset = build_dataset(args, cfg, is_train=False)
														
 
															     np.random.seed(0)
														
 
															     class_colors = [(np.random.randint(255),
														
 
															                      np.random.randint(255),
														
 
															-                     np.random.randint(255))
														
 
															-                     for _ in range(dataset_info['num_classes'])]
														
 
															+                     np.random.randint(255)) for _ in range(cfg.num_classes)]
														
 
															     # Model
														
 
															-    model = build_model(args, cfg, dataset_info['num_classes'], is_val=False)
														
 
															+    model = build_model(args, cfg, is_val=False)
														
 
															     model = load_weight(model, args.weight, args.fuse_conv_bn)
														
 
															     model.to(device).eval()
														
@@ -161,19 +131,14 @@ if __name__ == '__main__':
 
															         max_size=cfg['test_max_size'],
														
 
															         device=device)
														
 
															     del model_copy
														
 
															-
														
 
															-    # Resave model weight
														
 
															-    if args.resave:
														
 
															-        print('Resave: {}'.format(args.model.upper()))
														
 
															-        checkpoint = torch.load(args.weight, map_location='cpu')
														
 
															-        output_dir = 'weights/{}/{}/'.format(args.dataset, args.model)
														
 
															-        os.makedirs(output_dir, exist_ok=True)
														
 
															-        checkpoint_path = os.path.join(output_dir, "{}_pure.pth".format(args.model))
														
 
															-        torch.save({'model': model.state_dict(),
														
 
															-                    'mAP': checkpoint.pop("mAP"),
														
 
															-                    'epoch': checkpoint.pop("epoch")}, 
														
 
															-                    checkpoint_path)
														
 
															     print("================= DETECT =================")
														
 
															     # run
														
 
															-    run(args, model, device, dataset, transform, class_colors, dataset_info['class_labels'])
														
 
															+    test_det(args         = args,
														
 
															+             model        = model, 
														
 
															+             device       = device, 
														
 
															+             dataset      = dataset,
														
 
															+             transform    = transform,
														
 
															+             class_colors = class_colors,
														
 
															+             class_names  = cfg.class_labels,
														
 
															+             )
														
--- a/odlab/utils/box_ops.py
+++ b/odlab/utils/box_ops.py
@@ -59,81 +59,18 @@ def get_ious(bboxes1,
 
															     else:
														
 
															         raise NotImplementedError
														
 
															-
														
 
															-def delta2bbox(proposals,
														
 
															-               deltas,
														
 
															-               max_shape=None,
														
 
															-               wh_ratio_clip=16 / 1000,
														
 
															-               clip_border=True,
														
 
															-               add_ctr_clamp=False,
														
 
															-               ctr_clamp=32):
														
 
															-
														
 
															-    dxy = deltas[..., :2]
														
 
															-    dwh = deltas[..., 2:]
														
 
															-
														
 
															-    # Compute width/height of each roi
														
 
															-    pxy = proposals[..., :2]
														
 
															-    pwh = proposals[..., 2:]
														
 
															-
														
 
															-    dxy_wh = pwh * dxy
														
 
															-    wh_ratio_clip = torch.tensor(wh_ratio_clip).to(deltas.device)
														
 
															-    max_ratio = torch.abs(torch.log(wh_ratio_clip))
														
 
															-    if add_ctr_clamp:
														
 
															-        dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
														
 
															-        dwh = torch.clamp(dwh, max=max_ratio)
														
 
															-    else:
														
 
															-        dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
														
 
															-
														
 
															-    gxy = pxy + dxy_wh
														
 
															-    gwh = pwh * dwh.exp()
														
 
															-    x1y1 = gxy - (gwh * 0.5)
														
 
															-    x2y2 = gxy + (gwh * 0.5)
														
 
															-    bboxes = torch.cat([x1y1, x2y2], dim=-1)
														
 
															-    if clip_border and max_shape is not None:
														
 
															-        bboxes[..., 0::2].clamp_(min=0).clamp_(max=max_shape[1])
														
 
															-        bboxes[..., 1::2].clamp_(min=0).clamp_(max=max_shape[0])
														
 
															-        
														
 
															-    return bboxes
														
 
															-
														
 
															-
														
 
															-def bbox2delta(proposals, gt, means=(0., 0., 0., 0.), stds=(1., 1., 1., 1.)):
														
 
															-    # hack for matcher
														
 
															-    if proposals.size() != gt.size():
														
 
															-        proposals = proposals[:, None]
														
 
															-        gt = gt[None]
														
 
															-
														
 
															-    proposals = proposals.float()
														
 
															-    gt = gt.float()
														
 
															-    px, py, pw, ph = proposals.unbind(-1)
														
 
															-    gx, gy, gw, gh = gt.unbind(-1)
														
 
															-
														
 
															-    dx = (gx - px) / (pw + 0.1)
														
 
															-    dy = (gy - py) / (ph + 0.1)
														
 
															-    dw = torch.log(gw / (pw + 0.1))
														
 
															-    dh = torch.log(gh / (ph + 0.1))
														
 
															-    deltas = torch.stack([dx, dy, dw, dh], dim=-1)
														
 
															-
														
 
															-    means = deltas.new_tensor(means).unsqueeze(0)
														
 
															-    stds = deltas.new_tensor(stds).unsqueeze(0)
														
 
															-    deltas = deltas.sub_(means).div_(stds)
														
 
															-
														
 
															-    return deltas
														
 
															-
														
 
															-
														
 
															 def box_cxcywh_to_xyxy(x):
														
 
															     x_c, y_c, w, h = x.unbind(-1)
														
 
															     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
														
 
															          (x_c + 0.5 * w), (y_c + 0.5 * h)]
														
 
															     return torch.stack(b, dim=-1)
														
 
															-
														
 
															 def box_xyxy_to_cxcywh(x):
														
 
															     x0, y0, x1, y1 = x.unbind(-1)
														
 
															     b = [(x0 + x1) / 2, (y0 + y1) / 2,
														
 
															          (x1 - x0), (y1 - y0)]
														
 
															     return torch.stack(b, dim=-1)
														
 
															-
														
 
															 # modified from torchvision to also return the union
														
 
															 def box_iou(boxes1, boxes2):
														
 
															     area1 = box_area(boxes1)
														
@@ -152,7 +89,6 @@ def box_iou(boxes1, boxes2):
 
															     return iou, union
														
 
															-
														
 
															 def generalized_box_iou(boxes1, boxes2):
														
 
															     """
														
 
															     Generalized IoU from https://giou.stanford.edu/
														
@@ -175,30 +111,3 @@ def generalized_box_iou(boxes1, boxes2):
 
															     area = wh[:, :, 0] * wh[:, :, 1]
														
 
															     return iou - (area - union) / area
														
 
															-
														
 
															-
														
 
															-def masks_to_boxes(masks):
														
 
															-    """Compute the bounding boxes around the provided masks
														
 
															-
														
 
															-    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
														
 
															-
														
 
															-    Returns a [N, 4] tensors, with the boxes in xyxy format
														
 
															-    """
														
 
															-    if masks.numel() == 0:
														
 
															-        return torch.zeros((0, 4), device=masks.device)
														
 
															-
														
 
															-    h, w = masks.shape[-2:]
														
 
															-
														
 
															-    y = torch.arange(0, h, dtype=torch.float)
														
 
															-    x = torch.arange(0, w, dtype=torch.float)
														
 
															-    y, x = torch.meshgrid(y, x)
														
 
															-
														
 
															-    x_mask = (masks * x.unsqueeze(0))
														
 
															-    x_max = x_mask.flatten(1).max(-1)[0]
														
 
															-    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
														
 
															-
														
 
															-    y_mask = (masks * y.unsqueeze(0))
														
 
															-    y_max = y_mask.flatten(1).max(-1)[0]
														
 
															-    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
														
 
															-
														
 
															-    return torch.stack([x_min, y_min, x_max, y_max], 1)
														
--- a/odlab/utils/dn_compoments.py
+++ b/odlab/utils/dn_compoments.py
@@ -1,98 +0,0 @@
 
															-import torch
														
 
															-from .box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
														
 
															-
														
 
															-
														
 
															-def inverse_sigmoid(x, eps=1e-5):
														
 
															-    x = x.clamp(min=0., max=1.)
														
 
															-    return torch.log(x.clamp(min=eps) / (1 - x).clamp(min=eps))
														
 
															-
														
 
															-def get_contrastive_denoising_training_group(targets,
														
 
															-                                             num_classes,
														
 
															-                                             num_queries,
														
 
															-                                             class_embed,
														
 
															-                                             num_denoising=100,
														
 
															-                                             label_noise_ratio=0.5,
														
 
															-                                             box_noise_scale=1.0,):
														
 
															-    if num_denoising <= 0:
														
 
															-        return None, None, None, None
														
 
															-
														
 
															-    num_gts = [len(t['labels']) for t in targets]
														
 
															-    device = targets[0]['labels'].device
														
 
															-    
														
 
															-    max_gt_num = max(num_gts)
														
 
															-    if max_gt_num == 0:
														
 
															-        return None, None, None, None
														
 
															-
														
 
															-    num_group = num_denoising // max_gt_num
														
 
															-    num_group = 1 if num_group == 0 else num_group
														
 
															-    # pad gt to max_num of a batch
														
 
															-    bs = len(num_gts)
														
 
															-
														
 
															-    input_query_class = torch.full([bs, max_gt_num], num_classes, dtype=torch.int32, device=device)
														
 
															-    input_query_bbox = torch.zeros([bs, max_gt_num, 4], device=device)
														
 
															-    pad_gt_mask = torch.zeros([bs, max_gt_num], dtype=torch.bool, device=device)
														
 
															-
														
 
															-    for i in range(bs):
														
 
															-        num_gt = num_gts[i]
														
 
															-        if num_gt > 0:
														
 
															-            input_query_class[i, :num_gt] = targets[i]['labels']
														
 
															-            input_query_bbox[i, :num_gt] = targets[i]['boxes']
														
 
															-            pad_gt_mask[i, :num_gt] = 1
														
 
															-    # each group has positive and negative queries.
														
 
															-    input_query_class = input_query_class.tile([1, 2 * num_group])
														
 
															-    input_query_bbox = input_query_bbox.tile([1, 2 * num_group, 1])
														
 
															-    pad_gt_mask = pad_gt_mask.tile([1, 2 * num_group])
														
 
															-    # positive and negative mask
														
 
															-    negative_gt_mask = torch.zeros([bs, max_gt_num * 2, 1], device=device)
														
 
															-    negative_gt_mask[:, max_gt_num:] = 1
														
 
															-    negative_gt_mask = negative_gt_mask.tile([1, num_group, 1])
														
 
															-    positive_gt_mask = 1 - negative_gt_mask
														
 
															-    # contrastive denoising training positive index
														
 
															-    positive_gt_mask = positive_gt_mask.squeeze(-1) * pad_gt_mask
														
 
															-    dn_positive_idx = torch.nonzero(positive_gt_mask)[:, 1]
														
 
															-    dn_positive_idx = torch.split(dn_positive_idx, [n * num_group for n in num_gts])
														
 
															-    # total denoising queries
														
 
															-    num_denoising = int(max_gt_num * 2 * num_group)
														
 
															-
														
 
															-    if label_noise_ratio > 0:
														
 
															-        mask = torch.rand_like(input_query_class, dtype=torch.float) < (label_noise_ratio * 0.5)
														
 
															-        # randomly put a new one here
														
 
															-        new_label = torch.randint_like(mask, 0, num_classes, dtype=input_query_class.dtype)
														
 
															-        input_query_class = torch.where(mask & pad_gt_mask, new_label, input_query_class)
														
 
															-
														
 
															-    if box_noise_scale > 0:
														
 
															-        known_bbox = box_cxcywh_to_xyxy(input_query_bbox)
														
 
															-        diff = torch.tile(input_query_bbox[..., 2:] * 0.5, [1, 1, 2]) * box_noise_scale
														
 
															-        rand_sign = torch.randint_like(input_query_bbox, 0, 2) * 2.0 - 1.0
														
 
															-        rand_part = torch.rand_like(input_query_bbox)
														
 
															-        rand_part = (rand_part + 1.0) * negative_gt_mask + rand_part * (1 - negative_gt_mask)
														
 
															-        rand_part *= rand_sign
														
 
															-        known_bbox += rand_part * diff
														
 
															-        known_bbox.clip_(min=0.0, max=1.0)
														
 
															-        input_query_bbox = box_xyxy_to_cxcywh(known_bbox)
														
 
															-        input_query_bbox = inverse_sigmoid(input_query_bbox)
														
 
															-    input_query_class = class_embed(input_query_class)
														
 
															-
														
 
															-    tgt_size = num_denoising + num_queries
														
 
															-    # attn_mask = torch.ones([tgt_size, tgt_size], device=device) < 0
														
 
															-    attn_mask = torch.full([tgt_size, tgt_size], False, dtype=torch.bool, device=device)
														
 
															-    # match query cannot see the reconstruction
														
 
															-    attn_mask[num_denoising:, :num_denoising] = True
														
 
															-    
														
 
															-    # reconstruct cannot see each other
														
 
															-    for i in range(num_group):
														
 
															-        if i == 0:
														
 
															-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True
														
 
															-        if i == num_group - 1:
														
 
															-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * i * 2] = True
														
 
															-        else:
														
 
															-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True
														
 
															-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * 2 * i] = True
														
 
															-        
														
 
															-    dn_meta = {
														
 
															-        "dn_positive_idx": dn_positive_idx,
														
 
															-        "dn_num_group": num_group,
														
 
															-        "dn_num_split": [num_denoising, num_queries]
														
 
															-    }
														
 
															-
														
 
															-    return input_query_class, input_query_bbox, attn_mask, dn_meta
														
--- a/odlab/utils/lr_scheduler.py
+++ b/odlab/utils/lr_scheduler.py
@@ -51,7 +51,7 @@ def build_lr_scheduler(cfg, optimizer, resume=None):
 
															         pass
														
 
															     if resume is not None:
														
 
															-        print('keep training: ', resume)
														
 
															+        print('Load lr scheduler from the checkpoint: ', resume)
														
 
															         checkpoint = torch.load(resume)
														
 
															         # checkpoint state dict
														
 
															         checkpoint_state_dict = checkpoint.pop("lr_scheduler")
														
--- a/odlab/utils/misc.py
+++ b/odlab/utils/misc.py
@@ -2,13 +2,11 @@
 
															 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
														
 
															 # ---------------------------------------------------------------------------
														
 
															 import time
														
 
															-import math
														
 
															 import datetime
														
 
															 import numpy as np
														
 
															-from typing import List
														
 
															-from thop import profile
														
 
															-from copy import deepcopy
														
 
															-from collections import defaultdict, deque
														
 
															+from   typing import List
														
 
															+from   thop import profile
														
 
															+from   collections import defaultdict, deque
														
 
															 import torch
														
 
															 import torch.nn as nn
														
@@ -243,14 +241,6 @@ def collate_fn(batch):
 
															 # ---------------------------- For Model ----------------------------
														
 
															-def match_name_keywords(n, name_keywords):
														
 
															-    out = False
														
 
															-    for b in name_keywords:
														
 
															-        if b in n:
														
 
															-            out = True
														
 
															-            break
														
 
															-    return out
														
 
															-
														
 
															 ## fuse Conv & BN layer
														
 
															 def fuse_conv_bn(module):
														
 
															     """Recursively fuse conv and bn in a module.
														
@@ -346,133 +336,6 @@ def get_total_grad_norm(parameters, norm_type=2):
 
															                             norm_type)
														
 
															     return total_norm
														
 
															-## param Dict
														
 
															-def get_param_dict(model, cfg, return_name=False):
														
 
															-    # sanity check: a variable could not match backbone_names and linear_proj_names at the same time
														
 
															-    cfg['lr_backbone'] = cfg['base_lr'] * cfg['backbone_lr_ratio']
														
 
															-    for n, p in model.named_parameters():
														
 
															-        if match_name_keywords(n, cfg['lr_backbone_names']) and match_name_keywords(n, cfg['lr_linear_proj_names']):
														
 
															-            raise ValueError
														
 
															-
														
 
															-    param_dicts = [
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if not match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and not match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['base_lr'],
														
 
															-            "weight_decay": cfg['weight_decay'],
														
 
															-        },
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and not match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['lr_backbone'],
														
 
															-            "weight_decay": cfg['weight_decay'],
														
 
															-        },
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if not match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and not match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['base_lr'] * cfg['lr_linear_proj_mult'],
														
 
															-            "weight_decay": cfg['weight_decay'],
														
 
															-        },
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if not match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['base_lr'],
														
 
															-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
														
 
															-        },
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['lr_backbone'],
														
 
															-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
														
 
															-        },
														
 
															-        {
														
 
															-            "params": [
														
 
															-                p if not return_name else n
														
 
															-                for n, p in model.named_parameters()
														
 
															-                if not match_name_keywords(n, cfg['lr_backbone_names'])
														
 
															-                and match_name_keywords(n, cfg['lr_linear_proj_names'])
														
 
															-                and match_name_keywords(n, cfg['wd_norm_names'])
														
 
															-                and p.requires_grad
														
 
															-            ],
														
 
															-            "lr": cfg['base_lr'] * cfg['lr_linear_proj_mult'],
														
 
															-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
														
 
															-        },
														
 
															-    ]
														
 
															-
														
 
															-    return param_dicts
														
 
															-
														
 
															-## Model EMA
														
 
															-class ModelEMA(object):
														
 
															-    def __init__(self, cfg, model, updates=0):
														
 
															-        # Create EMA
														
 
															-        self.ema = deepcopy(self.de_parallel(model)).eval()  # FP32 EMA
														
 
															-        self.updates = updates  # number of EMA updates
														
 
															-        self.decay = lambda x: cfg['ema_decay'] * (1 - math.exp(-x / cfg['ema_tau']))  # decay exponential ramp (to help early epochs)
														
 
															-        for p in self.ema.parameters():
														
 
															-            p.requires_grad_(False)
														
 
															-
														
 
															-    def is_parallel(self, model):
														
 
															-        # Returns True if model is of type DP or DDP
														
 
															-        return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
														
 
															-
														
 
															-    def de_parallel(self, model):
														
 
															-        # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
														
 
															-        return model.module if self.is_parallel(model) else model
														
 
															-
														
 
															-    def copy_attr(self, a, b, include=(), exclude=()):
														
 
															-        # Copy attributes from b to a, options to only include [...] and to exclude [...]
														
 
															-        for k, v in b.__dict__.items():
														
 
															-            if (len(include) and k not in include) or k.startswith('_') or k in exclude:
														
 
															-                continue
														
 
															-            else:
														
 
															-                setattr(a, k, v)
														
 
															-
														
 
															-    def update(self, model):
														
 
															-        # Update EMA parameters
														
 
															-        self.updates += 1
														
 
															-        d = self.decay(self.updates)
														
 
															-
														
 
															-        msd = self.de_parallel(model).state_dict()  # model state_dict
														
 
															-        for k, v in self.ema.state_dict().items():
														
 
															-            if v.dtype.is_floating_point:  # true for FP16 and FP32
														
 
															-                v *= d
														
 
															-                v += (1 - d) * msd[k].detach()
														
 
															-        # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
														
 
															-
														
 
															-    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
														
 
															-        # Update EMA attributes
														
 
															-        self.copy_attr(self.ema, model, include, exclude)
														
 
															-
														
 
															 # ---------------------------- For Loss ----------------------------
														
 
															 ## focal loss
														
--- a/odlab/utils/optimizer.py
+++ b/odlab/utils/optimizer.py
@@ -2,97 +2,40 @@ import torch
 
															 from torch import optim
														
 
															-def build_optimizer(optimizer_cfg, model, param_dicts=None, resume=None):
														
 
															+def build_optimizer(cfg, model, resume=None):
														
 
															     print('==============================')
														
 
															-    print('Optimizer: {}'.format(optimizer_cfg['optimizer']))
														
 
															-    print('--base_lr: {}'.format(optimizer_cfg['base_lr']))
														
 
															-    print('--backbone_lr_ratio: {}'.format(optimizer_cfg['backbone_lr_ratio']))
														
 
															-    print('--momentum: {}'.format(optimizer_cfg['momentum']))
														
 
															-    print('--weight_decay: {}'.format(optimizer_cfg['weight_decay']))
														
 
															-
														
 
															-    if param_dicts is None:
														
 
															-        param_dicts = [
														
 
															-            {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
														
 
															-            {
														
 
															-                "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
														
 
															-                "lr": optimizer_cfg['base_lr'] * optimizer_cfg['backbone_lr_ratio'],
														
 
															-            },
														
 
															-        ]
														
 
															-
														
 
															-    if optimizer_cfg['optimizer'] == 'sgd':
														
 
															+    print('Optimizer: {}'.format(cfg.optimizer))
														
 
															+    print('--base_lr: {}'.format(cfg.base_lr))
														
 
															+    print('--backbone_lr_ratio: {}'.format(cfg.backbone_lr_ratio))
														
 
															+    print('--momentum: {}'.format(cfg.momentum))
														
 
															+    print('--weight_decay: {}'.format(cfg.weight_decay))
														
 
															+
														
 
															+    param_dicts = [
														
 
															+        {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
														
 
															+        {
														
 
															+            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
														
 
															+            "lr": cfg.base_lr * cfg.backbone_lr_ratio,
														
 
															+        },
														
 
															+    ]
														
 
															+
														
 
															+    if cfg.optimizer == 'sgd':
														
 
															         optimizer = optim.SGD(
														
 
															             params=param_dicts, 
														
 
															-            lr=optimizer_cfg['base_lr'],
														
 
															-            momentum=optimizer_cfg['momentum'],
														
 
															-            weight_decay=optimizer_cfg['weight_decay']
														
 
															+            lr=cfg.base_lr,
														
 
															+            momentum=cfg.momentum,
														
 
															+            weight_decay=cfg.weight_decay
														
 
															             )
														
 
															-    elif optimizer_cfg['optimizer'] == 'adamw':
														
 
															+    elif cfg.optimizer == 'adamw':
														
 
															         optimizer = optim.AdamW(
														
 
															             params=param_dicts, 
														
 
															-            lr=optimizer_cfg['base_lr'],
														
 
															-            weight_decay=optimizer_cfg['weight_decay']
														
 
															+            lr=cfg.base_lr,
														
 
															+            weight_decay=cfg.weight_decay
														
 
															             )
														
 
															     start_epoch = 0
														
 
															     if resume is not None:
														
 
															-        print('keep training: ', resume)
														
 
															-        checkpoint = torch.load(resume)
														
 
															-        # checkpoint state dict
														
 
															-        checkpoint_state_dict = checkpoint.pop("optimizer")
														
 
															-        optimizer.load_state_dict(checkpoint_state_dict)
														
 
															-        start_epoch = checkpoint.pop("epoch") + 1
														
 
															-                                                        
														
 
															-    return optimizer, start_epoch
														
 
															-
														
 
															-
														
 
															-def build_detr_optimizer(optimizer_cfg, model, resume=None):
														
 
															-    print('==============================')
														
 
															-    print('Optimizer: {}'.format(optimizer_cfg['optimizer']))
														
 
															-    print('--base_lr: {}'.format(optimizer_cfg['base_lr']))
														
 
															-    print('--backbone_lr_ratio: {}'.format(optimizer_cfg['backbone_lr_ratio']))
														
 
															-    print('--weight_decay: {}'.format(optimizer_cfg['weight_decay']))
														
 
															-
														
 
															-    # ------------- Divide model's parameters -------------
														
 
															-    param_dicts = [], [], [], [], [], [], []
														
 
															-    norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)]
														
 
															-    for n, p in model.named_parameters():
														
 
															-        # Non-Backbone's learnable parameters
														
 
															-        if "backbone" not in n and p.requires_grad:
														
 
															-            if "bias" == n.split(".")[-1]:
														
 
															-                param_dicts[0].append(p)      # no weight decay for all layers' bias
														
 
															-            else:
														
 
															-                if n.split(".")[-2] in norm_names:
														
 
															-                    param_dicts[1].append(p)  # no weight decay for all NormLayers' weight
														
 
															-                elif "cpb_mlp1" in n.split(".") or "cpb_mlp2" in n.split("."):
														
 
															-                    param_dicts[2].append(p)  # no weight decay for plain-detr cpb_mlp weight
														
 
															-                else:
														
 
															-                    param_dicts[3].append(p)  # weight decay for all Non-NormLayers' weight
														
 
															-        # Backbone's learnable parameters
														
 
															-        elif "backbone" in n and p.requires_grad:
														
 
															-            if "bias" == n.split(".")[-1]:
														
 
															-                param_dicts[4].append(p)      # no weight decay for all layers' bias
														
 
															-            else:
														
 
															-                if n.split(".")[-2] in norm_names:
														
 
															-                    param_dicts[5].append(p)  # no weight decay for all NormLayers' weight
														
 
															-                else:
														
 
															-                    param_dicts[6].append(p)  # weight decay for all Non-NormLayers' weight
														
 
															-
														
 
															-    # Non-Backbone's learnable parameters
														
 
															-    optimizer = torch.optim.AdamW(param_dicts[0], lr=optimizer_cfg['base_lr'], weight_decay=0.0)
														
 
															-    optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0})
														
 
															-    optimizer.add_param_group({"params": param_dicts[2], "weight_decay": 0.0})
														
 
															-    optimizer.add_param_group({"params": param_dicts[3], "weight_decay": optimizer_cfg['weight_decay']})
														
 
															-
														
 
															-    # Backbone's learnable parameters
														
 
															-    backbone_lr = optimizer_cfg['base_lr'] * optimizer_cfg['backbone_lr_ratio']
														
 
															-    optimizer.add_param_group({"params": param_dicts[4], "lr": backbone_lr, "weight_decay": 0.0})
														
 
															-    optimizer.add_param_group({"params": param_dicts[5], "lr": backbone_lr, "weight_decay": 0.0})
														
 
															-    optimizer.add_param_group({"params": param_dicts[6], "lr": backbone_lr, "weight_decay": optimizer_cfg['weight_decay']})
														
 
															-
														
 
															-    start_epoch = 0
														
 
															-    if resume is not None:
														
 
															-        print('keep training: ', resume)
														
 
															+        print('Load optimzier from the checkpoint: ', resume)
														
 
															         checkpoint = torch.load(resume)
														
 
															         # checkpoint state dict
														
 
															         checkpoint_state_dict = checkpoint.pop("optimizer")
														
--- a/odlab/utils/plot_utils.py
+++ b/odlab/utils/plot_utils.py
@@ -1,107 +0,0 @@
 
															-"""
														
 
															-Plotting utilities to visualize training logs.
														
 
															-"""
														
 
															-import torch
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-import seaborn as sns
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-from pathlib import Path, PurePath
														
 
															-
														
 
															-
														
 
															-def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'):
														
 
															-    '''
														
 
															-    Function to plot specific fields from training log(s). Plots both training and test results.
														
 
															-
														
 
															-    :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
														
 
															-              - fields = which results to plot from each log file - plots both training and test for each field.
														
 
															-              - ewm_col = optional, which column to use as the exponential weighted smoothing of the plots
														
 
															-              - log_name = optional, name of log file if different than default 'log.txt'.
														
 
															-
														
 
															-    :: Outputs - matplotlib plots of results in fields, color coded for each log file.
														
 
															-               - solid lines are training results, dashed lines are test results.
														
 
															-
														
 
															-    '''
														
 
															-    func_name = "plot_utils.py::plot_logs"
														
 
															-
														
 
															-    # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
														
 
															-    # convert single Path to list to avoid 'not iterable' error
														
 
															-
														
 
															-    if not isinstance(logs, list):
														
 
															-        if isinstance(logs, PurePath):
														
 
															-            logs = [logs]
														
 
															-            print(f"{func_name} info: logs param expects a list argument, converted to list[Path].")
														
 
															-        else:
														
 
															-            raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \
														
 
															-            Expect list[Path] or single Path obj, received {type(logs)}")
														
 
															-
														
 
															-    # Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir
														
 
															-    for i, dir in enumerate(logs):
														
 
															-        if not isinstance(dir, PurePath):
														
 
															-            raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}")
														
 
															-        if not dir.exists():
														
 
															-            raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}")
														
 
															-        # verify log_name exists
														
 
															-        fn = Path(dir / log_name)
														
 
															-        if not fn.exists():
														
 
															-            print(f"-> missing {log_name}.  Have you gotten to Epoch 1 in training?")
														
 
															-            print(f"--> full path of missing log file: {fn}")
														
 
															-            return
														
 
															-
														
 
															-    # load log file(s) and plot
														
 
															-    dfs = [pd.read_json(Path(p) / log_name, lines=True) for p in logs]
														
 
															-
														
 
															-    fig, axs = plt.subplots(ncols=len(fields), figsize=(16, 5))
														
 
															-
														
 
															-    for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
														
 
															-        for j, field in enumerate(fields):
														
 
															-            if field == 'mAP':
														
 
															-                coco_eval = pd.DataFrame(
														
 
															-                    np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1]
														
 
															-                ).ewm(com=ewm_col).mean()
														
 
															-                axs[j].plot(coco_eval, c=color)
														
 
															-            else:
														
 
															-                df.interpolate().ewm(com=ewm_col).mean().plot(
														
 
															-                    y=[f'train_{field}', f'test_{field}'],
														
 
															-                    ax=axs[j],
														
 
															-                    color=[color] * 2,
														
 
															-                    style=['-', '--']
														
 
															-                )
														
 
															-    for ax, field in zip(axs, fields):
														
 
															-        ax.legend([Path(p).name for p in logs])
														
 
															-        ax.set_title(field)
														
 
															-
														
 
															-
														
 
															-def plot_precision_recall(files, naming_scheme='iter'):
														
 
															-    if naming_scheme == 'exp_id':
														
 
															-        # name becomes exp_id
														
 
															-        names = [f.parts[-3] for f in files]
														
 
															-    elif naming_scheme == 'iter':
														
 
															-        names = [f.stem for f in files]
														
 
															-    else:
														
 
															-        raise ValueError(f'not supported {naming_scheme}')
														
 
															-    fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
														
 
															-    for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names):
														
 
															-        data = torch.load(f)
														
 
															-        # precision is n_iou, n_points, n_cat, n_area, max_det
														
 
															-        precision = data['precision']
														
 
															-        recall = data['params'].recThrs
														
 
															-        scores = data['scores']
														
 
															-        # take precision for all classes, all areas and 100 detections
														
 
															-        precision = precision[0, :, :, 0, -1].mean(1)
														
 
															-        scores = scores[0, :, :, 0, -1].mean(1)
														
 
															-        prec = precision.mean()
														
 
															-        rec = data['recall'][0, :, 0, -1].mean()
														
 
															-        print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' +
														
 
															-              f'score={scores.mean():0.3f}, ' +
														
 
															-              f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}'
														
 
															-              )
														
 
															-        axs[0].plot(recall, precision, c=color)
														
 
															-        axs[1].plot(recall, scores, c=color)
														
 
															-
														
 
															-    axs[0].set_title('Precision / Recall')
														
 
															-    axs[0].legend(names)
														
 
															-    axs[1].set_title('Scores / Recall')
														
 
															-    axs[1].legend(names)
														
 
															-    return fig, axs
														
--- a/odlab/utils/vis_tools.py
+++ b/odlab/utils/vis_tools.py
@@ -61,7 +61,7 @@ def vis_data(images, targets, masks=None, class_labels=None, normalized_coord=Fa
 
															         cv2.imshow('train target', image)
														
 
															         cv2.waitKey(0)
														
 
															-## plot bbox & label on image
														
 
															+## Draw bbox & label on the image
														
 
															 def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
														
 
															     x1, y1, x2, y2 = bbox
														
 
															     x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
														
@@ -77,101 +77,14 @@ def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
 
															     return img
														
 
															-## visualize detection
														
 
															-def visualize(img, 
														
 
															-              bboxes, 
														
 
															-              scores, 
														
 
															-              labels, 
														
 
															-              vis_thresh, 
														
 
															-              class_colors, 
														
 
															-              class_names):
														
 
															+## Visualize the detection results
														
 
															+def visualize(image, bboxes, scores, labels, class_colors, class_names):
														
 
															     ts = 0.4
														
 
															     for i, bbox in enumerate(bboxes):
														
 
															-        if scores[i] > vis_thresh:
														
 
															-            cls_id = int(labels[i])
														
 
															-            cls_color = class_colors[cls_id]
														
 
															-                
														
 
															-            mess = '%s: %.2f' % (class_names[cls_id], scores[i])
														
 
															-            img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts)
														
 
															-
														
 
															-    return img
														
 
															-        
														
 
															-
														
 
															-## convert feature to he heatmap
														
 
															-def convert_feature_heatmap(feature):
														
 
															-    """
														
 
															-        feature: (ndarray) [H, W, C]
														
 
															-    """
														
 
															-    heatmap = None
														
 
															-
														
 
															-    return heatmap
														
 
															-
														
 
															-## draw feature on the image
														
 
															-def draw_feature(img, features, save=None):
														
 
															-    """
														
 
															-        img: (ndarray & cv2.Mat) [H, W, C], where the C is 3 for RGB or 1 for Gray.
														
 
															-        features: (List[ndarray]). It is a list of the multiple feature map whose shape is [H, W, C].
														
 
															-        save: (bool) save the result or not.
														
 
															-    """
														
 
															-    img_h, img_w = img.shape[:2]
														
 
															-
														
 
															-    for i, fmp in enumerate(features):
														
 
															-        hmp = convert_feature_heatmap(fmp)
														
 
															-        hmp = cv2.resize(hmp, (img_w, img_h))
														
 
															-        hmp = hmp.astype(np.uint8)*255
														
 
															-        hmp_rgb = cv2.applyColorMap(hmp, cv2.COLORMAP_JET)
														
 
															-        
														
 
															-        superimposed_img = hmp_rgb * 0.4 + img 
														
 
															-
														
 
															-        # show the heatmap
														
 
															-        plt.imshow(hmp)
														
 
															-        plt.close()
														
 
															-
														
 
															-        # show the image with heatmap
														
 
															-        cv2.imshow("image with heatmap", superimposed_img)
														
 
															-        cv2.waitKey(0)
														
 
															-        cv2.destroyAllWindows()
														
 
															-
														
 
															-        if save:
														
 
															-            save_dir = 'feature_heatmap'
														
 
															-            os.makedirs(save_dir, exist_ok=True)
														
 
															-            cv2.imwrite(os.path.join(save_dir, 'feature_{}.png'.format(i) ), superimposed_img)    
														
 
															-
														
 
															-
														
 
															-# -------------------------- For Tracking Task --------------------------
														
 
															-def get_color(idx):
														
 
															-    idx = idx * 3
														
 
															-    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
														
 
															-
														
 
															-    return color
														
 
															-
														
 
															-
														
 
															-def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
														
 
															-    im = np.ascontiguousarray(np.copy(image))
														
 
															-    im_h, im_w = im.shape[:2]
														
 
															-
														
 
															-    top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
														
 
															-
														
 
															-    #text_scale = max(1, image.shape[1] / 1600.)
														
 
															-    #text_thickness = 2
														
 
															-    #line_thickness = max(1, int(image.shape[1] / 500.))
														
 
															-    text_scale = 2
														
 
															-    text_thickness = 2
														
 
															-    line_thickness = 3
														
 
															-
														
 
															-    radius = max(5, int(im_w/140.))
														
 
															-    cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
														
 
															-                (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2)
														
 
															+        cls_id = int(labels[i])
														
 
															+        cls_color = class_colors[cls_id]
														
 
															+            
														
 
															+        mess = '%s: %.2f' % (class_names[cls_id], scores[i])
														
 
															+        image = plot_bbox_labels(image, bbox, mess, cls_color, text_scale=ts)
														
 
															-    for i, tlwh in enumerate(tlwhs):
														
 
															-        x1, y1, w, h = tlwh
														
 
															-        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
														
 
															-        obj_id = int(obj_ids[i])
														
 
															-        id_text = '{}'.format(int(obj_id))
														
 
															-        if ids2 is not None:
														
 
															-            id_text = id_text + ', {}'.format(int(ids2[i]))
														
 
															-        color = get_color(abs(obj_id))
														
 
															-        cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
														
 
															-        cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
														
 
															-                    thickness=text_thickness)
														
 
															-    return im
														
 
															+    return image