1 年之前 · 373bd025d1
--- a/odlab/config/__init__.py
+++ b/odlab/config/__init__.py
@@ -1,15 +1,11 @@
 
				 # ----------------------- Model Config -----------------------
			
 
				-from .retinanet_config import retinanet_cfg
			
 
				 from .fcos_config      import fcos_cfg
			
 
				 from .yolof_config     import yolof_cfg
			
 
				 from .detr_config      import detr_cfg
			
 
				 
			
 
				 def build_config(args):
			
 
				-    # RetinaNet
			
 
				-    if args.model in retinanet_cfg.keys():
			
 
				-        return retinanet_cfg[args.model]
			
 
				     # FCOS
			
 
				-    elif args.model in fcos_cfg.keys():
			
 
				+    if   args.model in fcos_cfg.keys():
			
 
				         return fcos_cfg[args.model]
			
 
				     # YOLOF
			
 
				     elif args.model in yolof_cfg.keys():
			
--- a/odlab/config/detr_config.py
+++ b/odlab/config/detr_config.py
@@ -1,4 +1,14 @@
 
				-# Plain DETR
			
 
				+# DETR
			
 
				+
			
 
				+
			
 
				+class DetrBaseConfig(object):
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def print_config(self):
			
 
				+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
			
 
				+        for k, v in config_dict.items():
			
 
				+            print("{} : {}".format(k, v))
			
 
				 
			
 
				 detr_cfg = {
			
 
				     'detr_r50':{
			
--- a/odlab/config/fcos_config.py
+++ b/odlab/config/fcos_config.py
@@ -1,6 +1,15 @@
 
				 # Fully Convolutional One-Stage object detector
			
 
				 
			
 
				 
			
 
				+class FcosBaseConfig(object):
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def print_config(self):
			
 
				+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
			
 
				+        for k, v in config_dict.items():
			
 
				+            print("{} : {}".format(k, v))
			
 
				+
			
 
				 fcos_cfg = {
			
 
				     'fcos_r18_1x':{
			
 
				         # ----------------- Model-----------------
			
@@ -164,4 +173,164 @@ fcos_cfg = {
 
				         'normalize_coords': False,
			
 
				     },
			
 
				 
			
 
				+    'fcos_rt_r18_1x':{
			
 
				+        # ----------------- Model-----------------
			
 
				+        ## Backbone
			
 
				+        'backbone': 'resnet18',
			
 
				+        'backbone_norm': 'FrozeBN',
			
 
				+        'res5_dilation': False,
			
 
				+        'pretrained': True,
			
 
				+        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone
			
 
				+        'pretrained_weight': 'imagenet1k_v1',
			
 
				+        'max_stride': 32,
			
 
				+        'out_stride': [8, 16, 32],
			
 
				+        ## Neck
			
 
				+        'neck': 'basic_fpn',
			
 
				+        'fpn_p6_feat': False,
			
 
				+        'fpn_p7_feat': False,
			
 
				+        'fpn_p6_from_c5': False,
			
 
				+        ## Head
			
 
				+        'head': 'fcos_head',
			
 
				+        'head_dim': 256,
			
 
				+        'num_cls_head': 4,
			
 
				+        'num_reg_head': 4,
			
 
				+        'head_act': 'relu',
			
 
				+        'head_norm': 'GN',
			
 
				+        ## Post-process
			
 
				+        'train_topk': 1000,
			
 
				+        'train_conf_thresh': 0.05,
			
 
				+        'train_nms_thresh': 0.6,
			
 
				+        'test_topk': 100,
			
 
				+        'test_conf_thresh': 0.5,
			
 
				+        'test_nms_thresh': 0.45,
			
 
				+        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
			
 
				+        # ----------------- Label Assignment -----------------
			
 
				+        'matcher': 'fcos_matcher',
			
 
				+        'matcher_hpy':{'center_sampling_radius': 1.5,
			
 
				+                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
			
 
				+                       },
			
 
				+        # ----------------- Loss weight -----------------
			
 
				+        ## Loss hyper-parameters
			
 
				+        'focal_loss_alpha': 0.25,
			
 
				+        'focal_loss_gamma': 2.0,
			
 
				+        'loss_cls_weight': 1.0,
			
 
				+        'loss_reg_weight': 1.0,
			
 
				+        'loss_ctn_weight': 1.0,
			
 
				+        # ----------------- Training -----------------
			
 
				+        ## Training scheduler
			
 
				+        'scheduler': '1x',
			
 
				+        ## Optimizer
			
 
				+        'optimizer': 'sgd',
			
 
				+        'base_lr': 0.01 / 16,
			
 
				+        'backbone_lr_ratio': 1.0 / 1.0,
			
 
				+        'momentum': 0.9,
			
 
				+        'weight_decay': 1e-4,
			
 
				+        'clip_max_norm': -1.0,
			
 
				+        ## LR Scheduler
			
 
				+        'lr_scheduler': 'step',
			
 
				+        'warmup': 'linear',
			
 
				+        'warmup_iters': 500,
			
 
				+        'warmup_factor': 0.00066667,
			
 
				+        ## Epoch
			
 
				+        'max_epoch': 36,       # 1x
			
 
				+        'lr_epoch': [24, 33],  # 1x
			
 
				+        # ----------------- Input -----------------
			
 
				+        ## Transforms
			
 
				+        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
			
 
				+        'train_max_size': 900,
			
 
				+        'test_min_size': [512],
			
 
				+        'test_max_size': 736,
			
 
				+        ## Pixel mean & std
			
 
				+        'pixel_mean': [0.485, 0.456, 0.406],
			
 
				+        'pixel_std':  [0.229, 0.224, 0.225],
			
 
				+        ## Transforms
			
 
				+        'detr_style': False,
			
 
				+        'trans_config': [
			
 
				+            {'name': 'RandomHFlip'},
			
 
				+            {'name': 'RandomResize'},
			
 
				+        ],
			
 
				+        'box_format': 'xyxy',
			
 
				+        'normalize_coords': False,
			
 
				+    },
			
 
				+
			
 
				+    'fcos_rt_r50_1x':{
			
 
				+        # ----------------- Model-----------------
			
 
				+        ## Backbone
			
 
				+        'backbone': 'resnet50',
			
 
				+        'backbone_norm': 'FrozeBN',
			
 
				+        'res5_dilation': False,
			
 
				+        'pretrained': True,
			
 
				+        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone
			
 
				+        'pretrained_weight': 'imagenet1k_v1',
			
 
				+        'max_stride': 32,
			
 
				+        'out_stride': [8, 16, 32],
			
 
				+        ## Neck
			
 
				+        'neck': 'basic_fpn',
			
 
				+        'fpn_p6_feat': False,
			
 
				+        'fpn_p7_feat': False,
			
 
				+        'fpn_p6_from_c5': False,
			
 
				+        ## Head
			
 
				+        'head': 'fcos_head',
			
 
				+        'head_dim': 256,
			
 
				+        'num_cls_head': 4,
			
 
				+        'num_reg_head': 4,
			
 
				+        'head_act': 'relu',
			
 
				+        'head_norm': 'GN',
			
 
				+        ## Post-process
			
 
				+        'train_topk': 1000,
			
 
				+        'train_conf_thresh': 0.05,
			
 
				+        'train_nms_thresh': 0.6,
			
 
				+        'test_topk': 100,
			
 
				+        'test_conf_thresh': 0.5,
			
 
				+        'test_nms_thresh': 0.45,
			
 
				+        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
			
 
				+        # ----------------- Label Assignment -----------------
			
 
				+        'matcher': 'fcos_matcher',
			
 
				+        'matcher_hpy':{'center_sampling_radius': 1.5,
			
 
				+                       'object_sizes_of_interest': [[-1, 64], [64, 128], [128, float('inf')]]
			
 
				+                       },
			
 
				+        # ----------------- Loss weight -----------------
			
 
				+        ## Loss hyper-parameters
			
 
				+        'focal_loss_alpha': 0.25,
			
 
				+        'focal_loss_gamma': 2.0,
			
 
				+        'loss_cls_weight': 1.0,
			
 
				+        'loss_reg_weight': 1.0,
			
 
				+        'loss_ctn_weight': 1.0,
			
 
				+        # ----------------- Training -----------------
			
 
				+        ## Training scheduler
			
 
				+        'scheduler': '1x',
			
 
				+        ## Optimizer
			
 
				+        'optimizer': 'sgd',
			
 
				+        'base_lr': 0.01 / 16,
			
 
				+        'backbone_lr_ratio': 1.0 / 1.0,
			
 
				+        'momentum': 0.9,
			
 
				+        'weight_decay': 1e-4,
			
 
				+        'clip_max_norm': -1.0,
			
 
				+        ## LR Scheduler
			
 
				+        'lr_scheduler': 'step',
			
 
				+        'warmup': 'linear',
			
 
				+        'warmup_iters': 500,
			
 
				+        'warmup_factor': 0.00066667,
			
 
				+        ## Epoch
			
 
				+        'max_epoch': 36,       # 1x
			
 
				+        'lr_epoch': [24, 33],  # 1x
			
 
				+        # ----------------- Input -----------------
			
 
				+        ## Transforms
			
 
				+        'train_min_size': [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608],   # short edge of image
			
 
				+        'train_max_size': 900,
			
 
				+        'test_min_size': [512],
			
 
				+        'test_max_size': 736,
			
 
				+        ## Pixel mean & std
			
 
				+        'pixel_mean': [0.485, 0.456, 0.406],
			
 
				+        'pixel_std':  [0.229, 0.224, 0.225],
			
 
				+        ## Transforms
			
 
				+        'detr_style': False,
			
 
				+        'trans_config': [
			
 
				+            {'name': 'RandomHFlip'},
			
 
				+            {'name': 'RandomResize'},
			
 
				+        ],
			
 
				+        'box_format': 'xyxy',
			
 
				+        'normalize_coords': False,
			
 
				+    },
			
 
				+
			
 
				 }
			
--- a/odlab/config/retinanet_config.py
+++ b/odlab/config/retinanet_config.py
@@ -1,175 +0,0 @@
 
				-# RetinaNet
			
 
				-
			
 
				-
			
 
				-retinanet_cfg = {
			
 
				-    'retinanet_r18_1x':{
			
 
				-        # ----------------- Model-----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'resnet18',
			
 
				-        'backbone_norm': 'FrozeBN',
			
 
				-        'res5_dilation': False,
			
 
				-        'pretrained': True,
			
 
				-        'pretrained_weight': 'imagenet1k_v1',
			
 
				-        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone        
			
 
				-        'max_stride': 128,
			
 
				-        'out_stride': [8, 16, 32, 64, 128],
			
 
				-        ## Neck
			
 
				-        'neck': 'basic_fpn',
			
 
				-        'fpn_p6_feat': True,
			
 
				-        'fpn_p7_feat': True,
			
 
				-        'fpn_p6_from_c5': True,
			
 
				-        ## Head
			
 
				-        'head': 'retinanet_head',
			
 
				-        'head_dim': 256,
			
 
				-        'num_cls_head': 4,
			
 
				-        'num_reg_head': 4,
			
 
				-        'head_act': 'relu',
			
 
				-        'head_norm': None,
			
 
				-        'anchor_config': {'basic_size': [[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]],
			
 
				-                          'aspect_ratio': [0.5, 1.0, 2.0],
			
 
				-                          'area_scale': [2 ** 0, 2 ** (1. / 3.), 2 ** (2. / 3.)]},
			
 
				-        ## Post-process
			
 
				-        'train_topk': 1000,
			
 
				-        'train_conf_thresh': 0.05,
			
 
				-        'train_nms_thresh': 0.6,
			
 
				-        'test_topk': 100,
			
 
				-        'test_conf_thresh': 0.3,
			
 
				-        'test_nms_thresh': 0.45,
			
 
				-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
			
 
				-        # ----------------- Label Assignment -----------------
			
 
				-        'matcher': 'retinanet_matcher',
			
 
				-        'matcher_hpy': {'iou_thresh': [0.4, 0.5],
			
 
				-                        'iou_labels': [0, -1, 1], # [negative sample, ignored sample, positive sample]
			
 
				-                        'allow_low_quality_matches': True,
			
 
				-                        },
			
 
				-        # ----------------- Loss weight -----------------
			
 
				-        ## Loss hyper-parameters
			
 
				-        'focal_loss_alpha': 0.25,
			
 
				-        'focal_loss_gamma': 2.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_reg_weight': 1.0,
			
 
				-        'use_giou_loss': False,
			
 
				-        # ----------------- Training -----------------
			
 
				-        ## Training scheduler
			
 
				-        'scheduler': '1x',
			
 
				-        ## Optimizer
			
 
				-        'optimizer': 'sgd',
			
 
				-        'base_lr': 0.01 / 16,
			
 
				-        'backbone_lr_ratio': 1.0 / 1.0,
			
 
				-        'momentum': 0.9,
			
 
				-        'weight_decay': 1e-4,
			
 
				-        'clip_max_norm': -1.0,
			
 
				-        'param_dict_type': 'default',
			
 
				-        ## LR Scheduler
			
 
				-        'lr_scheduler': 'step',
			
 
				-        'warmup': 'linear',
			
 
				-        'warmup_iters': 500,
			
 
				-        'warmup_factor': 0.00066667,
			
 
				-        ## Epoch
			
 
				-        'max_epoch': 12,      # 1x
			
 
				-        'lr_epoch': [8, 11],  # 1x
			
 
				-        # ----------------- Input -----------------
			
 
				-        ## Transforms
			
 
				-        'train_min_size': [800],   # short edge of image
			
 
				-        'train_max_size': 1333,
			
 
				-        'test_min_size': [800],
			
 
				-        'test_max_size': 1333,
			
 
				-        ## Pixel mean & std
			
 
				-        'pixel_mean': [0.485, 0.456, 0.406],
			
 
				-        'pixel_std':  [0.229, 0.224, 0.225],
			
 
				-        ## Transforms
			
 
				-        'detr_style': False,
			
 
				-        'trans_config': [
			
 
				-            {'name': 'RandomHFlip'},
			
 
				-            {'name': 'RandomResize'},
			
 
				-        ],
			
 
				-        'box_format': 'xyxy',
			
 
				-        'normalize_coords': False,
			
 
				-    },
			
 
				-
			
 
				-    'retinanet_r50_1x':{
			
 
				-        # ----------------- Model-----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'resnet50',
			
 
				-        'backbone_norm': 'FrozeBN',
			
 
				-        'res5_dilation': False,
			
 
				-        'pretrained': True,
			
 
				-        'pretrained_weight': 'imagenet1k_v1',
			
 
				-        'freeze_at': 1,  # freeze stem layer + layer1 of the backbone        
			
 
				-        'max_stride': 128,
			
 
				-        'out_stride': [8, 16, 32, 64, 128],
			
 
				-        ## Neck
			
 
				-        'neck': 'basic_fpn',
			
 
				-        'fpn_p6_feat': True,
			
 
				-        'fpn_p7_feat': True,
			
 
				-        'fpn_p6_from_c5': True,
			
 
				-        ## Head
			
 
				-        'head': 'retinanet_head',
			
 
				-        'head_dim': 256,
			
 
				-        'num_cls_head': 4,
			
 
				-        'num_reg_head': 4,
			
 
				-        'head_act': 'relu',
			
 
				-        'head_norm': None,
			
 
				-        'anchor_config': {'basic_size': [[32, 32], [64, 64], [128, 128], [256, 256], [512, 512]],
			
 
				-                          'aspect_ratio': [0.5, 1.0, 2.0],
			
 
				-                          'area_scale': [2 ** 0, 2 ** (1. / 3.), 2 ** (2. / 3.)]},
			
 
				-        ## Post-process
			
 
				-        'train_topk': 1000,
			
 
				-        'train_conf_thresh': 0.05,
			
 
				-        'train_nms_thresh': 0.6,
			
 
				-        'test_topk': 100,
			
 
				-        'test_conf_thresh': 0.3,
			
 
				-        'test_nms_thresh': 0.45,
			
 
				-        'nms_class_agnostic': True,  # We prefer to use class-agnostic NMS in the demo.
			
 
				-        # ----------------- Label Assignment -----------------
			
 
				-        'matcher': 'retinanet_matcher',
			
 
				-        'matcher_hpy': {'iou_thresh': [0.4, 0.5],
			
 
				-                        'iou_labels': [0, -1, 1], # [negative sample, ignored sample, positive sample]
			
 
				-                        'allow_low_quality_matches': True,
			
 
				-                        },
			
 
				-        # ----------------- Loss weight -----------------
			
 
				-        ## Loss hyper-parameters
			
 
				-        'focal_loss_alpha': 0.25,
			
 
				-        'focal_loss_gamma': 2.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_reg_weight': 1.0,
			
 
				-        'use_giou_loss': False,
			
 
				-        # ----------------- Training -----------------
			
 
				-        ## Training scheduler
			
 
				-        'scheduler': '1x',
			
 
				-        ## Optimizer
			
 
				-        'optimizer': 'sgd',
			
 
				-        'base_lr': 0.01 / 16,
			
 
				-        'backbone_lr_ratio': 1.0 / 1.0,
			
 
				-        'momentum': 0.9,
			
 
				-        'weight_decay': 1e-4,
			
 
				-        'clip_max_norm': -1.0,
			
 
				-        'param_dict_type': 'default',
			
 
				-        ## LR Scheduler
			
 
				-        'lr_scheduler': 'step',
			
 
				-        'warmup': 'linear',
			
 
				-        'warmup_iters': 500,
			
 
				-        'warmup_factor': 0.00066667,
			
 
				-        ## Epoch
			
 
				-        'max_epoch': 12,      # 1x
			
 
				-        'lr_epoch': [8, 11],  # 1x
			
 
				-        # ----------------- Input -----------------
			
 
				-        ## Transforms
			
 
				-        'train_min_size': [800],   # short edge of image
			
 
				-        'train_max_size': 1333,
			
 
				-        'test_min_size': [800],
			
 
				-        'test_max_size': 1333,
			
 
				-        ## Pixel mean & std
			
 
				-        'pixel_mean': [0.485, 0.456, 0.406],
			
 
				-        'pixel_std':  [0.229, 0.224, 0.225],
			
 
				-        ## Transforms
			
 
				-        'detr_style': False,
			
 
				-        'trans_config': [
			
 
				-            {'name': 'RandomHFlip'},
			
 
				-            {'name': 'RandomResize'},
			
 
				-        ],
			
 
				-        'box_format': 'xyxy',
			
 
				-        'normalize_coords': False,
			
 
				-    },
			
 
				-
			
 
				-}
			
--- a/odlab/config/yolof_config.py
+++ b/odlab/config/yolof_config.py
@@ -1,6 +1,15 @@
 
				 # Fully Convolutional One-Stage object detector
			
 
				 
			
 
				 
			
 
				+class YolofBaseConfig(object):
			
 
				+    def __init__(self):
			
 
				+        pass
			
 
				+
			
 
				+    def print_config(self):
			
 
				+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
			
 
				+        for k, v in config_dict.items():
			
 
				+            print("{} : {}".format(k, v))
			
 
				+
			
 
				 yolof_cfg = {
			
 
				     # --------------- C5 level ---------------
			
 
				     'yolof_r18_c5_1x':{
			
--- a/odlab/datasets/__init__.py
+++ b/odlab/datasets/__init__.py
@@ -6,15 +6,15 @@ from .coco import build_coco, coco_labels, coco_indexs
 
				 from .transforms import build_transform
			
 
				 
			
 
				 
			
 
				-def build_dataset(args, transform=None, is_train=False):
			
 
				+def build_dataset(args, cfg, transform=None, is_train=False):
			
 
				     if args.dataset == 'coco':
			
 
				         dataset = build_coco(args, transform, is_train)
			
 
				-        dataset_info = {
			
 
				-            'class_labels': dataset.coco_labels,
			
 
				-            'num_classes': 80
			
 
				-        }
			
 
				+        class_labels = coco_labels
			
 
				+        num_classes  = 80
			
 
				+    cfg.class_labels = class_labels
			
 
				+    cfg.num_classes  = num_classes
			
 
				 
			
 
				-    return dataset, dataset_info
			
 
				+    return dataset
			
 
				 
			
 
				 def build_dataloader(args, dataset, batch_size, collate_fn, is_train=False):
			
 
				     if args.distributed:
			
--- a/odlab/engine.py
+++ b/odlab/engine.py
@@ -21,27 +21,24 @@ def train_one_epoch(cfg,
 
				                     epoch       : int,
			
 
				                     vis_target  : bool,
			
 
				                     warmup_lr_scheduler,
			
 
				-                    class_labels = None,
			
 
				-                    model_ema    = None,
			
 
				                     debug       :bool = False
			
 
				                     ):
			
 
				     model.train()
			
 
				     criterion.train()
			
 
				     metric_logger = MetricLogger(delimiter="  ")
			
 
				     metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
			
 
				-    header = 'Epoch: [{} / {}]'.format(epoch, cfg['max_epoch'])
			
 
				+    header = 'Epoch: [{} / {}]'.format(epoch, cfg.max_epoch)
			
 
				     epoch_size = len(data_loader)
			
 
				     print_freq = 10
			
 
				 
			
 
				-    iteration = 0
			
 
				-    for samples, targets in metric_logger.log_every(data_loader, print_freq, header):
			
 
				-        ni = iteration + epoch * epoch_size
			
 
				+    for iter_i, (samples, targets) in metric_logger.log_every(data_loader, print_freq, header):
			
 
				+        ni = iter_i + epoch * epoch_size
			
 
				         # WarmUp
			
 
				-        if ni < cfg['warmup_iters']:
			
 
				+        if ni < cfg.warmup_iters:
			
 
				             warmup_lr_scheduler(ni, optimizer)
			
 
				-        elif ni == cfg['warmup_iters']:
			
 
				+        elif ni == cfg.warmup_iters:
			
 
				             print('Warmup stage is over.')
			
 
				-            warmup_lr_scheduler.set_lr(optimizer, cfg['base_lr'])
			
 
				+            warmup_lr_scheduler.set_lr(optimizer, cfg.base_lr)
			
 
				 
			
 
				         # To device
			
 
				         images, masks = samples
			
@@ -51,7 +48,7 @@ def train_one_epoch(cfg,
 
				 
			
 
				         # Visualize train targets
			
 
				         if vis_target:
			
 
				-            vis_data(images, targets, masks, class_labels, cfg['normalize_coords'], cfg['box_format'])
			
 
				+            vis_data(images, targets, masks, cfg.class_labels, cfg.normalize_coords, cfg.box_format)
			
 
				 
			
 
				         # Inference
			
 
				         outputs = model(images, masks, targets)
			
@@ -60,13 +57,11 @@ def train_one_epoch(cfg,
 
				         loss_dict = criterion(outputs, targets)
			
 
				         loss_weight_dict = criterion.weight_dict
			
 
				         losses = sum(loss_dict[k] * loss_weight_dict[k] for k in loss_dict.keys() if k in loss_weight_dict)
			
 
				+        loss_value = losses.item()
			
 
				+        losses /= cfg.grad_accumulate
			
 
				 
			
 
				         # Reduce losses over all GPUs for logging purposes
			
 
				         loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
			
 
				-        loss_dict_reduced_scaled = {k: v * loss_weight_dict[k] for k, v in loss_dict_reduced.items() if k in loss_weight_dict}
			
 
				-        losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())
			
 
				-
			
 
				-        loss_value = losses_reduced_scaled.item()
			
 
				 
			
 
				         # Check loss
			
 
				         if not math.isfinite(loss_value):
			
@@ -75,18 +70,16 @@ def train_one_epoch(cfg,
 
				             sys.exit(1)
			
 
				 
			
 
				         # Backward
			
 
				-        optimizer.zero_grad()
			
 
				         losses.backward()
			
 
				-        if cfg['clip_max_norm'] > 0:
			
 
				-            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg['clip_max_norm'])
			
 
				-        optimizer.step()
			
 
				-        iteration += 1
			
 
				+        if cfg.clip_max_norm > 0:
			
 
				+            torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.clip_max_norm)
			
 
				 
			
 
				-        # ema
			
 
				-        if model_ema is not None:
			
 
				-            model_ema.update(model)
			
 
				+        # Optimize
			
 
				+        if (iter_i + 1) % cfg.grad_accumulate == 0:
			
 
				+            optimizer.step()
			
 
				+            optimizer.zero_grad()
			
 
				 
			
 
				-        metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled)
			
 
				+        metric_logger.update(loss=loss_value, **loss_dict_reduced)
			
 
				         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
			
 
				 
			
 
				         if debug:
			
--- a/odlab/evaluator/__init__.py
+++ b/odlab/evaluator/__init__.py
@@ -1,10 +1,10 @@
 
				 from evaluator.coco_evaluator import COCOAPIEvaluator
			
 
				 
			
 
				 
			
 
				-def build_evluator(args, cfg, device, testset=False):
			
 
				+def build_evluator(args, cfg, device):
			
 
				     evaluator = None
			
 
				     # COCO Evaluator
			
 
				     if args.dataset == 'coco':
			
 
				-        evaluator = COCOAPIEvaluator(args, cfg, device, testset)
			
 
				+        evaluator = COCOAPIEvaluator(args, cfg, device)
			
 
				 
			
 
				     return evaluator
			
--- a/odlab/evaluator/coco_evaluator.py
+++ b/odlab/evaluator/coco_evaluator.py
@@ -4,23 +4,21 @@ import contextlib
 
				 import torch
			
 
				 from pycocotools.cocoeval import COCOeval
			
 
				 
			
 
				-from datasets import build_dataset, build_transform
			
 
				-
			
 
				+from datasets import build_transform
			
 
				+from datasets.coco import build_coco
			
 
				 
			
 
				 class COCOAPIEvaluator():
			
 
				-    def __init__(self, args, cfg, device, testset=False):
			
 
				+    def __init__(self, args, cfg, device):
			
 
				         # ----------------- Basic parameters -----------------
			
 
				-        self.ddp_mode = True if args.distributed else False
			
 
				-        self.image_set = 'test2017' if testset else 'val2017'
			
 
				+        self.image_set = 'val2017'
			
 
				         self.device = device
			
 
				-        self.testset = testset
			
 
				         # ----------------- Metrics -----------------
			
 
				         self.map = 0.
			
 
				         self.ap50_95 = 0.
			
 
				         self.ap50 = 0.
			
 
				         # ----------------- Dataset -----------------
			
 
				         self.transform = build_transform(cfg, is_train=False)
			
 
				-        self.dataset, self.dataset_info = build_dataset(args, self.transform, is_train=False)
			
 
				+        self.dataset = build_coco(args, self.transform, is_train=False)
			
 
				 
			
 
				 
			
 
				     @torch.no_grad()
			
--- a/odlab/main.py
+++ b/odlab/main.py
@@ -11,7 +11,6 @@ from torch.nn.parallel import DistributedDataParallel as DDP
 
				 
			
 
				 from utils import distributed_utils
			
 
				 from utils.misc import compute_flops, collate_fn
			
 
				-from utils.misc import get_param_dict, ModelEMA
			
 
				 from utils.optimizer import build_optimizer
			
 
				 from utils.lr_scheduler import build_wp_lr_scheduler, build_lr_scheduler
			
 
				 
			
@@ -36,12 +35,8 @@ def parse_args():
 
				     # Model
			
 
				     parser.add_argument('-m', '--model', default='yolof_r18_c5_1x',
			
 
				                         help='build object detector')
			
 
				-    parser.add_argument('-p', '--pretrained', default=None, type=str,
			
 
				-                        help='load pretrained weight')
			
 
				     parser.add_argument('-r', '--resume', default=None, type=str,
			
 
				                         help='keep training')
			
 
				-    parser.add_argument('--ema', default=None, type=str,
			
 
				-                        help='use Model EMA trick.')
			
 
				     # Dataset
			
 
				     parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
			
 
				                         help='data root')
			
@@ -53,8 +48,6 @@ def parse_args():
 
				     parser.add_argument('--num_workers', default=2, type=int, 
			
 
				                         help='Number of workers used in dataloading')
			
 
				     # Epoch
			
 
				-    parser.add_argument('--eval_epoch', default=2, type=int,
			
 
				-                        help='interval between evaluations')
			
 
				     parser.add_argument('--save_folder', default='weights/', type=str, 
			
 
				                         help='path to save weight')
			
 
				     parser.add_argument('--eval_first', action="store_true", default=False,
			
@@ -68,8 +61,6 @@ def parse_args():
 
				                         help='number of distributed processes')
			
 
				     parser.add_argument('--sybn', action='store_true', default=False, 
			
 
				                         help='use sybn.')
			
 
				-    parser.add_argument('--find_unused_parameters', action='store_true', default=False, 
			
 
				-                        help='set find_unused_parameters as True.')
			
 
				     # Debug setting
			
 
				     parser.add_argument('--debug', action='store_true', default=False, 
			
 
				                         help='debug codes.')
			
@@ -93,7 +84,6 @@ def main():
 
				     path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				     os.makedirs(path_to_save, exist_ok=True)
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build DDP ----------------------------
			
 
				     distributed_utils.init_distributed_mode(args)
			
 
				     print("git:\n  {}\n".format(distributed_utils.get_sha()))
			
@@ -101,7 +91,6 @@ def main():
 
				     print('World size: {}'.format(world_size))
			
 
				     per_gpu_batch = args.batch_size // world_size
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build CUDA ----------------------------
			
 
				     if args.cuda and torch.cuda.is_available():
			
 
				         print('use cuda')
			
@@ -109,28 +98,23 @@ def main():
 
				     else:
			
 
				         device = torch.device("cpu")
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Fix random seed ----------------------------
			
 
				     fix_random_seed(args)
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build config ----------------------------
			
 
				     cfg = build_config(args)
			
 
				     print('Model config: ', cfg)
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build Dataset ----------------------------
			
 
				     transforms = build_transform(cfg, is_train=True)
			
 
				-    dataset, dataset_info = build_dataset(args, transforms, is_train=True)
			
 
				-
			
 
				+    dataset = build_dataset(args, cfg, transforms, is_train=True)
			
 
				 
			
 
				     # ---------------------------- Build Dataloader ----------------------------
			
 
				     train_loader = build_dataloader(args, dataset, per_gpu_batch, collate_fn, is_train=True)
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build model ----------------------------
			
 
				     ## Build model
			
 
				-    model, criterion = build_model(args, cfg, dataset_info['num_classes'], is_val=True)
			
 
				+    model, criterion = build_model(args, cfg, cfg.num_classes, is_val=True)
			
 
				     model.to(device)
			
 
				     model_without_ddp = model
			
 
				     ## Calcute Params & GFLOPs
			
@@ -139,51 +123,35 @@ def main():
 
				         model_copy.trainable = False
			
 
				         model_copy.eval()
			
 
				         compute_flops(model=model_copy,
			
 
				-                      min_size=cfg['test_min_size'],
			
 
				-                      max_size=cfg['test_max_size'],
			
 
				+                      min_size=cfg.test_min_size,
			
 
				+                      max_size=cfg.test_max_size,
			
 
				                       device=device)
			
 
				         del model_copy
			
 
				     if args.distributed:
			
 
				         dist.barrier()
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build Optimizer ----------------------------
			
 
				-    cfg['base_lr'] = cfg['base_lr'] * args.batch_size
			
 
				-    param_dicts = None
			
 
				-    if 'param_dict_type' in cfg.keys() and cfg['param_dict_type'] != 'default':
			
 
				-        print("- Param dict type: {}".format(cfg['param_dict_type']))
			
 
				-        param_dicts = get_param_dict(model_without_ddp, cfg)
			
 
				-    optimizer, start_epoch = build_optimizer(cfg, model_without_ddp, param_dicts, args.resume)
			
 
				-
			
 
				+    cfg.grad_accumulate = max(16 // args.batch_size, 1)
			
 
				+    cfg.base_lr = cfg.per_image_lr * args.batch_size * cfg.grad_accumulate
			
 
				+    optimizer, start_epoch = build_optimizer(cfg, model_without_ddp, args.resume)
			
 
				 
			
 
				     # ---------------------------- Build LR Scheduler ----------------------------
			
 
				-    wp_lr_scheduler = build_wp_lr_scheduler(cfg, cfg['base_lr'])
			
 
				+    wp_lr_scheduler = build_wp_lr_scheduler(cfg, cfg.base_lr)
			
 
				     lr_scheduler    = build_lr_scheduler(cfg, optimizer, args.resume)
			
 
				 
			
 
				-
			
 
				-    # ---------------------------- Build Model EMA ----------------------------
			
 
				-    model_ema = None
			
 
				-    if 'use_ema' in cfg.keys() and cfg['use_ema']:
			
 
				-        print("Build Model EMA for {}".format(args.model))
			
 
				-        model_ema = ModelEMA(cfg, model, start_epoch * len(train_loader))
			
 
				-
			
 
				-
			
 
				     # ---------------------------- Build DDP model ----------------------------
			
 
				     if args.distributed:
			
 
				-        model = DDP(model, device_ids=[args.gpu], find_unused_parameters=args.find_unused_parameters)
			
 
				+        model = DDP(model, device_ids=[args.gpu])
			
 
				         model_without_ddp = model.module
			
 
				 
			
 
				-
			
 
				     # ---------------------------- Build Evaluator ----------------------------
			
 
				     evaluator = build_evluator(args, cfg, device)
			
 
				 
			
 
				-
			
 
				     # ----------------------- Eval before training -----------------------
			
 
				     if args.eval_first and distributed_utils.is_main_process():
			
 
				         evaluator.evaluate(model_without_ddp)
			
 
				         return
			
 
				 
			
 
				-
			
 
				     # ----------------------- Training -----------------------
			
 
				     print("Start training")
			
 
				     best_map = -1.
			
@@ -201,8 +169,6 @@ def main():
 
				                         epoch,
			
 
				                         args.vis_tgt,
			
 
				                         wp_lr_scheduler,
			
 
				-                        dataset_info['class_labels'],
			
 
				-                        model_ema=model_ema,
			
 
				                         debug=args.debug)
			
 
				         
			
 
				         # LR Scheduler
			
@@ -210,23 +176,25 @@ def main():
 
				 
			
 
				         # Evaluate
			
 
				         if distributed_utils.is_main_process():
			
 
				-            model_eval = model_ema.ema if model_ema is not None else model_without_ddp
			
 
				+            model_eval = model_without_ddp
			
 
				+            to_save = False
			
 
				             if (epoch % args.eval_epoch) == 0 or (epoch == cfg['max_epoch'] - 1):
			
 
				                 if evaluator is None:
			
 
				-                    cur_map = 0.
			
 
				+                    to_save = True
			
 
				                 else:
			
 
				                     evaluator.evaluate(model_eval)
			
 
				-                    cur_map = evaluator.map
			
 
				-                # Save model
			
 
				-                if cur_map > best_map:
			
 
				-                    # update best-map
			
 
				-                    best_map = cur_map
			
 
				+                    # Save model
			
 
				+                    if evaluator.map >= best_map:
			
 
				+                        best_map = evaluator.map
			
 
				+                        to_save = True
			
 
				+
			
 
				+                if to_save:
			
 
				                     # save model
			
 
				-                    print('Saving state, epoch:', epoch + 1)
			
 
				+                    print('Saving state, epoch:', epoch)
			
 
				                     torch.save({'model':        model_eval.state_dict(),
			
 
				                                 'optimizer':    optimizer.state_dict(),
			
 
				                                 'lr_scheduler': lr_scheduler.state_dict(),
			
 
				-                                'mAP':          round(cur_map*100, 1),
			
 
				+                                'mAP':          round(best_map*100, 1),
			
 
				                                 'epoch':        epoch,
			
 
				                                 'args':         args}, 
			
 
				                                 os.path.join(path_to_save, '{}_best.pth'.format(args.model)))
			
--- a/odlab/models/backbone/__init__.py
+++ b/odlab/models/backbone/__init__.py
@@ -1,15 +1,13 @@
 
				 from .resnet           import build_resnet
			
 
				-from .swin_transformer import build_swin_transformer
			
 
				 
			
 
				 
			
 
				 def build_backbone(cfg):
			
 
				     print('==============================')
			
 
				     print('Backbone: {}'.format(cfg['backbone']))
			
 
				     # ResNet
			
 
				-    if cfg['backbone'] in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']:
			
 
				+    if "resnet" in cfg.backbone:
			
 
				         return build_resnet(cfg)
			
 
				-    # SwinTransformer
			
 
				-    elif cfg['backbone'] in ['swin_T_224_1k', 'swin_S_224_22k', 'swin_B_224_22k', 'swin_B_384_22k', 'swin_L_224_22k', 'swin_L_384_22k']:
			
 
				-        return build_swin_transformer(cfg)
			
 
				+    else:
			
 
				+        raise NotImplementedError("unknown backbone: {}".format(cfg.backbone))
			
 
				     
			
 
				                            
			
--- a/odlab/models/basic/conv.py
+++ b/odlab/models/basic/conv.py
@@ -1,11 +1,4 @@
 
				-import math
			
 
				-from typing import List
			
 
				-
			
 
				-import torch
			
 
				 import torch.nn as nn
			
 
				-import torch.nn.functional as F
			
 
				-
			
 
				-from .norm import LayerNorm2D
			
 
				 
			
 
				 
			
 
				 def get_conv2d(c1, c2, k, p, s, d, g):
			
@@ -80,218 +73,3 @@ class ConvModule(nn.Module):
 
				 
			
 
				     def forward(self, x):
			
 
				         return self.convs(x)
			
 
				-
			
 
				-class BasicConv(nn.Module):
			
 
				-    def __init__(self, 
			
 
				-                 in_dim,                   # in channels
			
 
				-                 out_dim,                  # out channels 
			
 
				-                 kernel_size=1,            # kernel size 
			
 
				-                 padding=0,                # padding
			
 
				-                 stride=1,                 # padding
			
 
				-                 dilation=1,               # dilation
			
 
				-                 act_type  :str = 'lrelu', # activation
			
 
				-                 norm_type :str = 'BN',    # normalization
			
 
				-                 depthwise :bool = False
			
 
				-                ):
			
 
				-        super(BasicConv, self).__init__()
			
 
				-        self.depthwise = depthwise
			
 
				-        if not depthwise:
			
 
				-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1)
			
 
				-            self.norm = get_norm(norm_type, out_dim)
			
 
				-        else:
			
 
				-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim)
			
 
				-            self.norm1 = get_norm(norm_type, in_dim)
			
 
				-            self.conv2 = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1)
			
 
				-            self.norm2 = get_norm(norm_type, out_dim)
			
 
				-        self.act  = get_activation(act_type)
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        if not self.depthwise:
			
 
				-            return self.act(self.norm(self.conv(x)))
			
 
				-        else:
			
 
				-            # Depthwise conv
			
 
				-            x = self.norm1(self.conv1(x))
			
 
				-            # Pointwise conv
			
 
				-            x = self.norm2(self.conv2(x))
			
 
				-            return x
			
 
				-
			
 
				-class UpSampleWrapper(nn.Module):
			
 
				-    """Upsample last feat map to specific stride."""
			
 
				-    def __init__(self, in_dim, upsample_factor):
			
 
				-        super(UpSampleWrapper, self).__init__()
			
 
				-        # ---------- Basic parameters ----------
			
 
				-        self.upsample_factor = upsample_factor
			
 
				-
			
 
				-        # ---------- Network parameters ----------
			
 
				-        if upsample_factor == 1:
			
 
				-            self.upsample = nn.Identity()
			
 
				-        else:
			
 
				-            scale = int(math.log2(upsample_factor))
			
 
				-            dim = in_dim
			
 
				-            layers = []
			
 
				-            for _ in range(scale-1):
			
 
				-                layers += [
			
 
				-                    nn.ConvTranspose2d(dim, dim, kernel_size=2, stride=2),
			
 
				-                    LayerNorm2D(dim),
			
 
				-                    nn.GELU()
			
 
				-                ]
			
 
				-            layers += [nn.ConvTranspose2d(dim, dim, kernel_size=2, stride=2)]
			
 
				-            self.upsample = nn.Sequential(*layers)
			
 
				-            self.out_dim = dim
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        x = self.upsample(x)
			
 
				-
			
 
				-        return x
			
 
				-
			
 
				-
			
 
				-# ----------------- RepCNN module -----------------
			
 
				-class RepVggBlock(nn.Module):
			
 
				-    def __init__(self, in_dim, out_dim, act_type='relu', norm_type='BN'):
			
 
				-        super().__init__()
			
 
				-        # ----------------- Basic parameters -----------------
			
 
				-        self.in_dim = in_dim
			
 
				-        self.out_dim = out_dim
			
 
				-        # ----------------- Network parameters -----------------
			
 
				-        self.conv1 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
			
 
				-        self.conv2 = BasicConv(in_dim, out_dim, kernel_size=1, padding=0, act_type=None, norm_type=norm_type)
			
 
				-        self.act   = get_activation(act_type) 
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        if hasattr(self, 'conv'):
			
 
				-            y = self.conv(x)
			
 
				-        else:
			
 
				-            y = self.conv1(x) + self.conv2(x)
			
 
				-
			
 
				-        return self.act(y)
			
 
				-
			
 
				-    def convert_to_deploy(self):
			
 
				-        if not hasattr(self, 'conv'):
			
 
				-            self.conv = nn.Conv2d(self.in_dim, self.out_dim, 3, 1, padding=1)
			
 
				-
			
 
				-        kernel, bias = self.get_equivalent_kernel_bias()
			
 
				-        self.conv.weight.data = kernel
			
 
				-        self.conv.bias.data = bias 
			
 
				-        # self.__delattr__('conv1')
			
 
				-        # self.__delattr__('conv2')
			
 
				-
			
 
				-    def get_equivalent_kernel_bias(self):
			
 
				-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
			
 
				-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
			
 
				-        
			
 
				-        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1
			
 
				-
			
 
				-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
			
 
				-        if kernel1x1 is None:
			
 
				-            return 0
			
 
				-        else:
			
 
				-            return F.pad(kernel1x1, [1, 1, 1, 1])
			
 
				-
			
 
				-    def _fuse_bn_tensor(self, branch: BasicConv):
			
 
				-        if branch is None:
			
 
				-            return 0, 0
			
 
				-        kernel = branch.conv.weight
			
 
				-        running_mean = branch.norm.running_mean
			
 
				-        running_var = branch.norm.running_var
			
 
				-        gamma = branch.norm.weight
			
 
				-        beta = branch.norm.bias
			
 
				-        eps = branch.norm.eps
			
 
				-        std = (running_var + eps).sqrt()
			
 
				-        t = (gamma / std).reshape(-1, 1, 1, 1)
			
 
				-        return kernel * t, beta - running_mean * gamma / std
			
 
				-
			
 
				-class RepCSPLayer(nn.Module):
			
 
				-    def __init__(self,
			
 
				-                 in_dim     :int   = 256,
			
 
				-                 out_dim    :int   = 256,
			
 
				-                 num_blocks :int   = 3,
			
 
				-                 expansion  :float = 1.0,
			
 
				-                 act_type   :str   = "relu",
			
 
				-                 norm_type  :str   = "GN",):
			
 
				-        super(RepCSPLayer, self).__init__()
			
 
				-        # ----------------- Basic parameters -----------------
			
 
				-        inter_dim = int(out_dim * expansion)
			
 
				-        # ----------------- Network parameters -----------------
			
 
				-        self.conv1 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.conv2 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.bottlenecks = nn.Sequential(*[
			
 
				-            RepVggBlock(inter_dim, inter_dim, act_type, norm_type) for _ in range(num_blocks)
			
 
				-        ])
			
 
				-        if inter_dim != out_dim:
			
 
				-            self.conv3 = BasicConv(inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        else:
			
 
				-            self.conv3 = nn.Identity()
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        x_1 = self.conv1(x)
			
 
				-        x_1 = self.bottlenecks(x_1)
			
 
				-        x_2 = self.conv2(x)
			
 
				-
			
 
				-        return self.conv3(x_1 + x_2)
			
 
				-
			
 
				-
			
 
				-# ----------------- CNN module -----------------
			
 
				-class YoloBottleneck(nn.Module):
			
 
				-    def __init__(self,
			
 
				-                 in_dim       :int,
			
 
				-                 out_dim      :int,
			
 
				-                 kernel_size  :List  = [1, 3],
			
 
				-                 expand_ratio :float = 0.5,
			
 
				-                 shortcut     :bool  = False,
			
 
				-                 act_type     :str   = 'silu',
			
 
				-                 norm_type    :str   = 'BN',
			
 
				-                 depthwise    :bool  = False,
			
 
				-                 ) -> None:
			
 
				-        super(YoloBottleneck, self).__init__()
			
 
				-        inter_dim = int(out_dim * expand_ratio)
			
 
				-        # ----------------- Network setting -----------------
			
 
				-        self.conv_layer1 = BasicConv(in_dim, inter_dim,
			
 
				-                                     kernel_size=kernel_size[0], padding=kernel_size[0]//2, stride=1,
			
 
				-                                     act_type=act_type, norm_type=norm_type)
			
 
				-        self.conv_layer2 = BasicConv(inter_dim, out_dim,
			
 
				-                                     kernel_size=kernel_size[1], padding=kernel_size[1]//2, stride=1,
			
 
				-                                     act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        self.shortcut = shortcut and in_dim == out_dim
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        h = self.conv_layer2(self.conv_layer1(x))
			
 
				-
			
 
				-        return x + h if self.shortcut else h
			
 
				-
			
 
				-class ELANLayer(nn.Module):
			
 
				-    def __init__(self,
			
 
				-                 in_dim,
			
 
				-                 out_dim,
			
 
				-                 expand_ratio :float = 0.5,
			
 
				-                 num_blocks   :int   = 1,
			
 
				-                 shortcut     :bool  = False,
			
 
				-                 act_type     :str   = 'silu',
			
 
				-                 norm_type    :str   = 'BN',
			
 
				-                 depthwise    :bool  = False,
			
 
				-                 ) -> None:
			
 
				-        super(ELANLayer, self).__init__()
			
 
				-        self.inter_dim = round(out_dim * expand_ratio)
			
 
				-        self.input_proj  = BasicConv(in_dim, self.inter_dim * 2, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.module = nn.ModuleList([YoloBottleneck(self.inter_dim,
			
 
				-                                                    self.inter_dim,
			
 
				-                                                    kernel_size  = [3, 3],
			
 
				-                                                    expand_ratio = 1.0,
			
 
				-                                                    shortcut     = shortcut,
			
 
				-                                                    act_type     = act_type,
			
 
				-                                                    norm_type    = norm_type,
			
 
				-                                                    depthwise    = depthwise)
			
 
				-                                                    for _ in range(num_blocks)])
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        # Input proj
			
 
				-        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
			
 
				-        out = list([x1, x2])
			
 
				-
			
 
				-        # Bottlenecl
			
 
				-        out.extend(m(out[-1]) for m in self.module)
			
 
				-
			
 
				-        # Output proj
			
 
				-        out = self.output_proj(torch.cat(out, dim=1))
			
 
				-
			
 
				-        return out
			
--- a/odlab/models/basic/norm.py
+++ b/odlab/models/basic/norm.py
@@ -38,18 +38,3 @@ class FrozenBatchNorm2d(torch.nn.Module):
 
				         scale = w * (rv + eps).rsqrt()
			
 
				         bias = b - rm * scale
			
 
				         return x * scale + bias
			
 
				-
			
 
				-class LayerNorm2D(nn.Module):
			
 
				-    def __init__(self, normalized_shape, norm_layer=nn.LayerNorm):
			
 
				-        super().__init__()
			
 
				-        self.ln = norm_layer(normalized_shape) if norm_layer is not None else nn.Identity()
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        """
			
 
				-        x: N C H W
			
 
				-        """
			
 
				-        x = x.permute(0, 2, 3, 1)
			
 
				-        x = self.ln(x)
			
 
				-        x = x.permute(0, 3, 1, 2)
			
 
				-        return x
			
 
				-    
			
--- a/odlab/models/detectors/__init__.py
+++ b/odlab/models/detectors/__init__.py
@@ -1,19 +1,15 @@
 
				 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
			
 
				 import torch
			
 
				 
			
 
				-from .retinanet.build import build_retinanet
			
 
				-from .fcos.build      import build_fcos
			
 
				-from .yolof.build     import build_yolof
			
 
				-from .detr.build      import build_detr
			
 
				+from .fcos.build  import build_fcos
			
 
				+from .yolof.build import build_yolof
			
 
				+from .detr.build  import build_detr
			
 
				 
			
 
				 
			
 
				 def build_model(args, cfg, num_classes=80, is_val=False):
			
 
				     # ------------ build object detector ------------
			
 
				-    ## RetinaNet    
			
 
				-    if 'retinanet' in args.model:
			
 
				-        model, criterion = build_retinanet(cfg, num_classes, is_val)
			
 
				     ## FCOS    
			
 
				-    elif 'fcos' in args.model:
			
 
				+    if 'fcos' in args.model:
			
 
				         model, criterion = build_fcos(cfg, num_classes, is_val)
			
 
				     ## YOLOF    
			
 
				     elif 'yolof' in args.model:
			
--- a/odlab/models/detectors/fcos/fcos.py
+++ b/odlab/models/detectors/fcos/fcos.py
@@ -96,7 +96,7 @@ class FCOS(nn.Module):
 
				 
			
 
				         return bboxes, scores, labels
			
 
				 
			
 
				-    def forward(self, src, src_mask=None, targets=None):
			
 
				+    def forward(self, src, src_mask=None):
			
 
				         # ---------------- Backbone ----------------
			
 
				         pyramid_feats = self.backbone(src)
			
 
				 
			
--- a/odlab/models/detectors/retinanet/README.md
+++ b/odlab/models/detectors/retinanet/README.md
@@ -1,55 +0,0 @@
 
				-# RetinaNet
			
 
				-
			
 
				-Our `RetinaNet-R50-1x` baseline on COCO-val:
			
 
				-```Shell
			
 
				-
			
 
				-```
			
 
				-
			
 
				-- ImageNet-1K_V1 pretrained
			
 
				-
			
 
				-| Model             |  scale     |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | Weight | Logs  |
			
 
				-| ------------------| ---------- | ----- | ---------------------- |  ---------------  | ------ | ----- |
			
 
				-| RetinaNet_R18_1x  |  800,1333  |       |          30.5          |        48.1       | [ckpt](https://github.com/yjh0410/ODLab/releases/download/detection_weights/retinanet_r18_1x_coco.pth) | [log](https://github.com/yjh0410/ODLab/releases/download/detection_weights/RetinaNet-R18-1x.txt) |
			
 
				-| RetinaNet_R50_1x  |  800,1333  |       |                        |                   |  |  |
			
 
				-
			
 
				-
			
 
				-## Train RetinaNet
			
 
				-### Single GPU
			
 
				-Taking training **RetinaNet_R18_1x** on COCO as the example,
			
 
				-```Shell
			
 
				-python main.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --batch_size 16 --eval_epoch 2
			
 
				-```
			
 
				-
			
 
				-### Multi GPU
			
 
				-Taking training **RetinaNet_R18_1x** on COCO as the example,
			
 
				-```Shell
			
 
				-python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root path/to/coco -m retinanet_r18_1x --batch_size 16 --eval_epoch 2 
			
 
				-```
			
 
				-
			
 
				-## Test RetinaNet
			
 
				-Taking testing **RetinaNet_R18_1x** on COCO-val as the example,
			
 
				-```Shell
			
 
				-python test.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --weight path/to/retinanet_r18_1x.pth -vt 0.4 --show 
			
 
				-```
			
 
				-
			
 
				-## Evaluate RetinaNet
			
 
				-Taking evaluating **RetinaNet_R18_1x** on COCO-val as the example,
			
 
				-```Shell
			
 
				-python main.py --cuda -d coco --root path/to/coco -m retinanet_r18_1x --resume path/to/retinanet_r18_1x.pth --eval_first
			
 
				-```
			
 
				-
			
 
				-## Demo
			
 
				-### Detect with Image
			
 
				-```Shell
			
 
				-python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show
			
 
				-```
			
 
				-
			
 
				-### Detect with Video
			
 
				-```Shell
			
 
				-python demo.py --mode video --path_to_vid path/to/video --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show --gif
			
 
				-```
			
 
				-
			
 
				-### Detect with Camera
			
 
				-```Shell
			
 
				-python demo.py --mode camera --cuda -m retinanet_r18_1x --weight path/to/weight -vt 0.4 --show --gif
			
 
				-```
			
--- a/odlab/models/detectors/retinanet/build.py
+++ b/odlab/models/detectors/retinanet/build.py
@@ -1,24 +0,0 @@
 
				-#!/usr/bin/env python3
			
 
				-# -*- coding:utf-8 -*-
			
 
				-
			
 
				-from .criterion import build_criterion
			
 
				-from .retinanet import RetinaNet
			
 
				-
			
 
				-
			
 
				-# build RetinaNet
			
 
				-def build_retinanet(cfg, num_classes=80, is_val=False):
			
 
				-    # -------------- Build RetinaNet --------------
			
 
				-    model = RetinaNet(cfg         = cfg,
			
 
				-                      num_classes = num_classes,
			
 
				-                      conf_thresh = cfg['train_conf_thresh'] if is_val else cfg['test_conf_thresh'],
			
 
				-                      nms_thresh  = cfg['train_nms_thresh']  if is_val else cfg['test_nms_thresh'],
			
 
				-                      topk        = cfg['train_topk']        if is_val else cfg['test_topk'],
			
 
				-                      ca_nms      = False if is_val else cfg['nms_class_agnostic'])
			
 
				-            
			
 
				-    # -------------- Build Criterion --------------
			
 
				-    criterion = None
			
 
				-    if is_val:
			
 
				-        # build criterion for training
			
 
				-        criterion = build_criterion(cfg, num_classes)
			
 
				-
			
 
				-    return model, criterion
			
--- a/odlab/models/detectors/retinanet/criterion.py
+++ b/odlab/models/detectors/retinanet/criterion.py
@@ -1,136 +0,0 @@
 
				-import torch
			
 
				-import torch.nn as nn
			
 
				-import torch.nn.functional as F
			
 
				-
			
 
				-from utils.box_ops import box_cxcywh_to_xyxy, generalized_box_iou
			
 
				-from utils.misc import sigmoid_focal_loss
			
 
				-from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
			
 
				-
			
 
				-from .matcher import RetinaNetMatcher
			
 
				-
			
 
				-
			
 
				-class Criterion(nn.Module):
			
 
				-    def __init__(self, cfg, num_classes=80):
			
 
				-        super().__init__()
			
 
				-        # ------------- Basic parameters -------------
			
 
				-        self.cfg = cfg
			
 
				-        self.num_classes = num_classes
			
 
				-        # ------------- Focal loss -------------
			
 
				-        self.alpha = cfg['focal_loss_alpha']
			
 
				-        self.gamma = cfg['focal_loss_gamma']
			
 
				-        # ------------- Loss weight -------------
			
 
				-        self.weight_dict = {'loss_cls': cfg['loss_cls_weight'],
			
 
				-                            'loss_reg': cfg['loss_reg_weight']}
			
 
				-        # ------------- Matcher -------------
			
 
				-        self.matcher_cfg = cfg['matcher_hpy']
			
 
				-        self.matcher = RetinaNetMatcher(num_classes,
			
 
				-                                        iou_threshold=self.matcher_cfg['iou_thresh'],
			
 
				-                                        iou_labels=self.matcher_cfg['iou_labels'],
			
 
				-                                        allow_low_quality_matches=self.matcher_cfg['allow_low_quality_matches']
			
 
				-                                        )
			
 
				-
			
 
				-    def loss_labels(self, pred_cls, tgt_cls, num_boxes):
			
 
				-        """
			
 
				-            pred_cls: (Tensor) [N, C]
			
 
				-            tgt_cls:  (Tensor) [N, C]
			
 
				-        """
			
 
				-        # cls loss: [V, C]
			
 
				-        loss_cls = sigmoid_focal_loss(pred_cls, tgt_cls, self.alpha, self.gamma)
			
 
				-
			
 
				-        return loss_cls.sum() / num_boxes
			
 
				-
			
 
				-    def loss_bboxes(self, pred_reg=None, pred_box=None, tgt_box=None, anchors=None, num_boxes=1, use_giou=False):
			
 
				-        """
			
 
				-            pred_reg: (Tensor) [Nq, 4]
			
 
				-            tgt_box:  (Tensor) [Nq, 4]
			
 
				-            anchors:  (Tensor) [Nq, 4]
			
 
				-        """
			
 
				-        # GIoU loss
			
 
				-        if use_giou:
			
 
				-            pred_giou = generalized_box_iou(pred_box, tgt_box)  # [N, M]
			
 
				-            loss_reg = 1. - torch.diag(pred_giou)
			
 
				-        
			
 
				-        # L1 loss
			
 
				-        else:
			
 
				-            # xyxy -> cxcy&bwbh
			
 
				-            tgt_cxcy = (tgt_box[..., :2] + tgt_box[..., 2:]) * 0.5
			
 
				-            tgt_bwbh = tgt_box[..., 2:] - tgt_box[..., :2]
			
 
				-            # encode gt box
			
 
				-            tgt_offsets = (tgt_cxcy - anchors[..., :2]) / anchors[..., 2:]
			
 
				-            tgt_sizes = torch.log(tgt_bwbh / anchors[..., 2:])
			
 
				-            tgt_box_encode = torch.cat([tgt_offsets, tgt_sizes], dim=-1)
			
 
				-            # compute l1 loss
			
 
				-            loss_reg = F.l1_loss(pred_reg, tgt_box_encode, reduction='none')
			
 
				-
			
 
				-        return loss_reg.sum() / num_boxes
			
 
				-
			
 
				-    def forward(self, outputs, targets):
			
 
				-        """
			
 
				-            outputs['pred_cls']: (Tensor) [B, M, C]
			
 
				-            outputs['pred_reg']: (Tensor) [B, M, 4]
			
 
				-            outputs['strides']: (List) [8, 16, 32, ...] stride of the model output
			
 
				-            targets: (List) [dict{'boxes': [...], 
			
 
				-                                 'labels': [...], 
			
 
				-                                 'orig_size': ...}, ...]
			
 
				-            anchors: (Tensor) [M, 4]
			
 
				-        """
			
 
				-        # -------------------- Pre-process --------------------
			
 
				-        cls_preds = torch.cat(outputs['pred_cls'], dim=1).view(-1, self.num_classes)
			
 
				-        reg_preds = torch.cat(outputs['pred_reg'], dim=1).view(-1, 4)
			
 
				-        box_preds = torch.cat(outputs['pred_box'], dim=1).view(-1, 4)
			
 
				-        masks = ~torch.cat(outputs['mask'], dim=1).view(-1)
			
 
				-        B = len(targets)
			
 
				-       
			
 
				-        # process anchor boxes
			
 
				-        anchor_boxes = torch.cat(outputs['anchors'])
			
 
				-        anchor_boxes = anchor_boxes[None].repeat(B, 1, 1)
			
 
				-        anchor_boxes_xyxy = box_cxcywh_to_xyxy(anchor_boxes)
			
 
				-
			
 
				-        # -------------------- Label Assignment --------------------
			
 
				-        tgt_classes, tgt_boxes = self.matcher(anchor_boxes_xyxy, targets)
			
 
				-        tgt_classes = tgt_classes.flatten()
			
 
				-        tgt_boxes = tgt_boxes.view(-1, 4)
			
 
				-        del anchor_boxes_xyxy
			
 
				-
			
 
				-        foreground_idxs = (tgt_classes >= 0) & (tgt_classes != self.num_classes)
			
 
				-        valid_idxs = (tgt_classes >= 0) & masks
			
 
				-        num_foreground = foreground_idxs.sum()
			
 
				-        if is_dist_avail_and_initialized():
			
 
				-            torch.distributed.all_reduce(num_foreground)
			
 
				-        num_foreground = torch.clamp(num_foreground / get_world_size(), min=1).item()
			
 
				-
			
 
				-        # -------------------- Classification loss --------------------
			
 
				-        gt_cls_target = torch.zeros_like(cls_preds)
			
 
				-        gt_cls_target[foreground_idxs, tgt_classes[foreground_idxs]] = 1
			
 
				-        loss_labels = self.loss_labels(
			
 
				-            cls_preds[valid_idxs], gt_cls_target[valid_idxs], num_foreground)
			
 
				-
			
 
				-        # -------------------- Regression loss --------------------
			
 
				-        if self.cfg['use_giou_loss']:
			
 
				-            box_preds_pos = box_preds[foreground_idxs]
			
 
				-            tgt_boxes_pos = tgt_boxes[foreground_idxs].to(reg_preds.device)
			
 
				-            loss_bboxes = self.loss_bboxes(
			
 
				-                pred_box=box_preds_pos, tgt_box=tgt_boxes_pos, num_boxes=num_foreground, use_giou=self.cfg['use_giou_loss'])
			
 
				-        else:
			
 
				-            reg_preds_pos = reg_preds[foreground_idxs]
			
 
				-            tgt_boxes_pos = tgt_boxes[foreground_idxs].to(reg_preds.device)
			
 
				-            anchors_pos = anchor_boxes.view(-1, 4)[foreground_idxs]
			
 
				-            loss_bboxes = self.loss_bboxes(
			
 
				-                pred_reg=reg_preds_pos, tgt_box=tgt_boxes_pos, anchors=anchors_pos, num_boxes=num_foreground, use_giou=self.cfg['use_giou_loss'])
			
 
				-
			
 
				-        loss_dict = dict(
			
 
				-                loss_cls = loss_labels,
			
 
				-                loss_reg = loss_bboxes,
			
 
				-        )
			
 
				-
			
 
				-        return loss_dict
			
 
				-
			
 
				-    
			
 
				-# build criterion
			
 
				-def build_criterion(cfg, num_classes=80):
			
 
				-    criterion = Criterion(cfg=cfg, num_classes=num_classes)
			
 
				-    return criterion
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    pass
			
--- a/odlab/models/detectors/retinanet/matcher.py
+++ b/odlab/models/detectors/retinanet/matcher.py
@@ -1,181 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# Copyright (c) Facebook, Inc. and its affiliates.
			
 
				-# Modified by BaseDetection, Inc. and its affiliates.
			
 
				-import torch
			
 
				-from utils.box_ops import box_iou
			
 
				-
			
 
				-
			
 
				-class RetinaNetMatcher(object):
			
 
				-    """
			
 
				-    This class assigns to each predicted "element" (e.g., a box) a ground-truth
			
 
				-    element. Each predicted element will have exactly zero or one matches; each
			
 
				-    ground-truth element may be matched to zero or more predicted elements.
			
 
				-
			
 
				-    The matching is determined by the MxN match_quality_matrix, that characterizes
			
 
				-    how well each (ground-truth, prediction)-pair match each other. For example,
			
 
				-    if the elements are boxes, this matrix may contain box intersection-over-union
			
 
				-    overlap values.
			
 
				-
			
 
				-    The matcher returns (a) a vector of length N containing the index of the
			
 
				-    ground-truth element m in [0, M) that matches to prediction n in [0, N).
			
 
				-    (b) a vector of length N containing the labels for each prediction.
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self,
			
 
				-                 num_classes, 
			
 
				-                 iou_threshold, 
			
 
				-                 iou_labels, 
			
 
				-                 allow_low_quality_matches=False):
			
 
				-        """
			
 
				-        Args:
			
 
				-            thresholds (list): a list of thresholds used to stratify predictions
			
 
				-                into levels.
			
 
				-            labels (list): a list of values to label predictions belonging at
			
 
				-                each level. A label can be one of {-1, 0, 1} signifying
			
 
				-                {ignore, negative class, positive class}, respectively.
			
 
				-            allow_low_quality_matches (bool): if True, produce additional matches
			
 
				-                for predictions with maximum match quality lower than high_threshold.
			
 
				-                See set_low_quality_matches_ for more details.
			
 
				-
			
 
				-            For example,
			
 
				-                thresholds = [0.3, 0.5]
			
 
				-                labels = [0, -1, 1]
			
 
				-                All predictions with iou < 0.3 will be marked with 0 and
			
 
				-                thus will be considered as false positives while training.
			
 
				-                All predictions with 0.3 <= iou < 0.5 will be marked with -1 and
			
 
				-                thus will be ignored.
			
 
				-                All predictions with 0.5 <= iou will be marked with 1 and
			
 
				-                thus will be considered as true positives.
			
 
				-        """
			
 
				-        self.num_classes = num_classes
			
 
				-        # Add -inf and +inf to first and last position in iou_thresholdhreshold
			
 
				-        iou_threshold = iou_threshold[:]
			
 
				-        assert iou_threshold[0] > 0
			
 
				-        iou_threshold.insert(0, -float("inf"))
			
 
				-        iou_threshold.append(float("inf"))
			
 
				-        assert all(low <= high for (low, high) in zip(iou_threshold[:-1], iou_threshold[1:]))
			
 
				-        assert all(label in [-1, 0, 1] for label in iou_labels)
			
 
				-        assert len(iou_labels) == len(iou_threshold) - 1
			
 
				-        self.iou_threshold = iou_threshold
			
 
				-        self.iou_labels = iou_labels
			
 
				-        self.allow_low_quality_matches = allow_low_quality_matches
			
 
				-
			
 
				-    @torch.no_grad()
			
 
				-    def __call__(self, anchors, targets):
			
 
				-        """
			
 
				-            anchors: (Tensor) [B, M, 4] (x1, y1, x2, y2)
			
 
				-            targets: (Dict) dict{'boxes': [...], 
			
 
				-                                 'labels': [...], 
			
 
				-                                 'orig_size': ...}
			
 
				-        """
			
 
				-        # list[Tensor(R, 4)], one for each image
			
 
				-        gt_classes = []
			
 
				-        gt_boxes = []
			
 
				-        device = anchors.device
			
 
				-
			
 
				-        for anchors_per_image, targets_per_image in zip(anchors, targets):
			
 
				-            # [N,]
			
 
				-            tgt_labels = targets_per_image['labels'].to(device)
			
 
				-            # [N, 4]
			
 
				-            tgt_boxes = targets_per_image['boxes'].to(device)
			
 
				-            # [N, M], N is the number of targets, M is the number of anchors
			
 
				-            match_quality_matrix, _ = box_iou(tgt_boxes, anchors_per_image)
			
 
				-            gt_matched_idxs, anchor_labels = self.matching(match_quality_matrix)
			
 
				-            has_gt = len(tgt_labels) > 0
			
 
				-            if has_gt:
			
 
				-                # ground truth box regression
			
 
				-                matched_gt_boxes = tgt_boxes[gt_matched_idxs]
			
 
				-
			
 
				-                gt_classes_i = tgt_labels[gt_matched_idxs]
			
 
				-                # Anchors with label 0 are treated as background.
			
 
				-                gt_classes_i[anchor_labels == 0] = self.num_classes
			
 
				-                # Anchors with label -1 are ignored.
			
 
				-                gt_classes_i[anchor_labels == -1] = -1
			
 
				-            else:
			
 
				-                gt_classes_i = torch.zeros_like(gt_matched_idxs) + self.num_classes
			
 
				-                matched_gt_boxes = torch.zeros_like(anchors_per_image)
			
 
				-
			
 
				-            gt_classes.append(gt_classes_i)
			
 
				-            gt_boxes.append(matched_gt_boxes)
			
 
				-
			
 
				-        return torch.stack(gt_classes), torch.stack(gt_boxes)
			
 
				-
			
 
				-    def matching(self, match_quality_matrix):
			
 
				-        """
			
 
				-        Args:
			
 
				-            match_quality_matrix (Tensor[float]): an N x M tensor, containing the
			
 
				-                pairwise quality between N ground-truth elements and M predicted
			
 
				-                elements. All elements must be >= 0 (due to the us of `torch.nonzero`
			
 
				-                for selecting indices in :meth:`set_low_quality_matches_`).
			
 
				-
			
 
				-        Returns:
			
 
				-            matches (Tensor[int64]): a vector of length M, where matches[i] is a matched
			
 
				-                ground-truth index in [0, N)
			
 
				-            match_labels (Tensor[int8]): a vector of length M, where pred_labels[i] indicates
			
 
				-                whether a prediction is a true or false positive or ignored
			
 
				-        """
			
 
				-        assert match_quality_matrix.dim() == 2
			
 
				-        if match_quality_matrix.numel() == 0:
			
 
				-            default_matches = match_quality_matrix.new_full(
			
 
				-                (match_quality_matrix.size(1),), 0, dtype=torch.int64
			
 
				-            )
			
 
				-            # When no gt boxes exist, we define IOU = 0 and therefore set labels
			
 
				-            # to `self.labels[0]`, which usually defaults to background class 0
			
 
				-            # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds
			
 
				-            default_match_labels = match_quality_matrix.new_full(
			
 
				-                (match_quality_matrix.size(1),), self.iou_labels[0], dtype=torch.int8
			
 
				-            )
			
 
				-            return default_matches, default_match_labels
			
 
				-
			
 
				-        assert torch.all(match_quality_matrix >= 0)
			
 
				-
			
 
				-        # match_quality_matrix is N (gt) x M (predicted)
			
 
				-        # Max over gt elements (dim 0) to find best gt candidate for each prediction
			
 
				-        matched_vals, matches = match_quality_matrix.max(dim=0)
			
 
				-
			
 
				-        match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8)
			
 
				-
			
 
				-        for (l, low, high) in zip(self.iou_labels, self.iou_threshold[:-1], self.iou_threshold[1:]):
			
 
				-            low_high = (matched_vals >= low) & (matched_vals < high)
			
 
				-            match_labels[low_high] = l
			
 
				-
			
 
				-        if self.allow_low_quality_matches:
			
 
				-            self.set_low_quality_matches_(match_labels, match_quality_matrix)
			
 
				-
			
 
				-        return matches, match_labels
			
 
				-
			
 
				-    def set_low_quality_matches_(self, match_labels, match_quality_matrix):
			
 
				-        """
			
 
				-        Produce additional matches for predictions that have only low-quality matches.
			
 
				-        Specifically, for each ground-truth G find the set of predictions that have
			
 
				-        maximum overlap with it (including ties); for each prediction in that set, if
			
 
				-        it is unmatched, then match it to the ground-truth G.
			
 
				-
			
 
				-        This function implements the RPN assignment case (i) in Sec. 3.1.2 of the
			
 
				-        Faster R-CNN paper: https://arxiv.org/pdf/1506.01497v3.pdf.
			
 
				-        """
			
 
				-        # For each gt, find the prediction with which it has highest quality
			
 
				-        highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
			
 
				-        # Find the highest quality match available, even if it is low, including ties.
			
 
				-        # Note that the matches qualities must be positive due to the use of
			
 
				-        # `torch.nonzero`.
			
 
				-        gt_pred_pairs_of_highest_quality = torch.nonzero(
			
 
				-            match_quality_matrix == highest_quality_foreach_gt[:, None],
			
 
				-            as_tuple=False
			
 
				-        )
			
 
				-        # Example gt_pred_pairs_of_highest_quality:
			
 
				-        #   tensor([[    0, 39796],
			
 
				-        #           [    1, 32055],
			
 
				-        #           [    1, 32070],
			
 
				-        #           [    2, 39190],
			
 
				-        #           [    2, 40255],
			
 
				-        #           [    3, 40390],
			
 
				-        #           [    3, 41455],
			
 
				-        #           [    4, 45470],
			
 
				-        #           [    5, 45325],
			
 
				-        #           [    5, 46390]])
			
 
				-        # Each row is a (gt index, prediction index)
			
 
				-        # Note how gt items 1, 2, 3, and 5 each have two ties
			
 
				-
			
 
				-        pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
			
 
				-        match_labels[pred_inds_to_update] = 1
			
--- a/odlab/models/detectors/retinanet/retinanet.py
+++ b/odlab/models/detectors/retinanet/retinanet.py
@@ -1,123 +0,0 @@
 
				-import numpy as np
			
 
				-import math
			
 
				-import torch
			
 
				-import torch.nn as nn
			
 
				-
			
 
				-# --------------- Model components ---------------
			
 
				-from ...backbone import build_backbone
			
 
				-from ...neck import build_neck
			
 
				-from ...head import build_head
			
 
				-
			
 
				-# --------------- External components ---------------
			
 
				-from utils.misc import multiclass_nms
			
 
				-
			
 
				-
			
 
				-# ------------------------ RetinaNet ------------------------
			
 
				-class RetinaNet(nn.Module):
			
 
				-    def __init__(self, 
			
 
				-                 cfg,
			
 
				-                 num_classes :int   = 80, 
			
 
				-                 conf_thresh :float = 0.05,
			
 
				-                 nms_thresh  :float = 0.6,
			
 
				-                 topk        :int   = 1000,
			
 
				-                 trainable   :bool  = False,
			
 
				-                 ca_nms      :bool  = False):
			
 
				-        super(RetinaNet, self).__init__()
			
 
				-        # ---------------------- Basic Parameters ----------------------
			
 
				-        self.cfg = cfg
			
 
				-        self.trainable = trainable
			
 
				-        self.topk = topk
			
 
				-        self.num_classes = num_classes
			
 
				-        self.conf_thresh = conf_thresh
			
 
				-        self.nms_thresh = nms_thresh
			
 
				-        self.ca_nms = ca_nms
			
 
				-
			
 
				-        # ---------------------- Network Parameters ----------------------
			
 
				-        ## Backbone
			
 
				-        self.backbone, feat_dims = build_backbone(cfg, trainable&cfg['pretrained'])
			
 
				-
			
 
				-        ## Neck
			
 
				-        self.fpn = build_neck(cfg, feat_dims, cfg['head_dim'])
			
 
				-        
			
 
				-        ## Heads
			
 
				-        self.head = build_head(cfg, cfg['head_dim'], cfg['head_dim'], num_classes)
			
 
				-
			
 
				-    def post_process(self, cls_preds, box_preds):
			
 
				-        """
			
 
				-        Input:
			
 
				-            cls_preds: List(Tensor) [[B, H x W, KA x C], ...]
			
 
				-            box_preds: List(Tensor) [[B, H x W, KA x 4], ...]
			
 
				-        """
			
 
				-        all_scores = []
			
 
				-        all_labels = []
			
 
				-        all_bboxes = []
			
 
				-        
			
 
				-        for cls_pred_i, box_pred_i in zip(cls_preds, box_preds):
			
 
				-            cls_pred_i = cls_pred_i[0]
			
 
				-            box_pred_i = box_pred_i[0]
			
 
				-            
			
 
				-            # (H x W x KA x C,)
			
 
				-            scores_i = cls_pred_i.sigmoid().flatten()
			
 
				-
			
 
				-            # Keep top k top scoring indices only.
			
 
				-            num_topk = min(self.topk, box_pred_i.size(0))
			
 
				-
			
 
				-            # torch.sort is actually faster than .topk (at least on GPUs)
			
 
				-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
			
 
				-            topk_scores = predicted_prob[:num_topk]
			
 
				-            topk_idxs = topk_idxs[:num_topk]
			
 
				-
			
 
				-            # filter out the proposals with low confidence score
			
 
				-            keep_idxs = topk_scores > self.conf_thresh
			
 
				-            topk_idxs = topk_idxs[keep_idxs]
			
 
				-
			
 
				-            # final scores
			
 
				-            scores = topk_scores[keep_idxs]
			
 
				-            # final labels
			
 
				-            labels = topk_idxs % self.num_classes
			
 
				-            # final bboxes
			
 
				-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
			
 
				-            bboxes = box_pred_i[anchor_idxs]
			
 
				-
			
 
				-            all_scores.append(scores)
			
 
				-            all_labels.append(labels)
			
 
				-            all_bboxes.append(bboxes)
			
 
				-
			
 
				-        scores = torch.cat(all_scores)
			
 
				-        labels = torch.cat(all_labels)
			
 
				-        bboxes = torch.cat(all_bboxes)
			
 
				-
			
 
				-        # to cpu & numpy
			
 
				-        scores = scores.cpu().numpy()
			
 
				-        labels = labels.cpu().numpy()
			
 
				-        bboxes = bboxes.cpu().numpy()
			
 
				-
			
 
				-        # nms
			
 
				-        scores, labels, bboxes = multiclass_nms(
			
 
				-            scores, labels, bboxes, self.nms_thresh, self.num_classes, self.ca_nms)
			
 
				-
			
 
				-        return bboxes, scores, labels
			
 
				-
			
 
				-    def forward(self, src, src_mask=None, targets=None):
			
 
				-        # ---------------- Backbone ----------------
			
 
				-        pyramid_feats = self.backbone(src)
			
 
				-
			
 
				-        # ---------------- Neck ----------------
			
 
				-        pyramid_feats = self.fpn(pyramid_feats)
			
 
				-
			
 
				-        # ---------------- Heads ----------------
			
 
				-        outputs = self.head(pyramid_feats, src_mask)
			
 
				-
			
 
				-        if not self.training:
			
 
				-            # ---------------- PostProcess ----------------
			
 
				-            cls_pred = outputs["pred_cls"]
			
 
				-            box_pred = outputs["pred_box"]
			
 
				-            bboxes, scores, labels = self.post_process(cls_pred, box_pred)
			
 
				-            # normalize bbox
			
 
				-            bboxes[..., 0::2] /= src.shape[-1]
			
 
				-            bboxes[..., 1::2] /= src.shape[-2]
			
 
				-            bboxes = bboxes.clip(0., 1.)
			
 
				-
			
 
				-            return bboxes, scores, labels
			
 
				-
			
 
				-        return outputs 
			
--- a/odlab/models/detectors/yolof/yolof.py
+++ b/odlab/models/detectors/yolof/yolof.py
@@ -81,7 +81,7 @@ class YOLOF(nn.Module):
 
				 
			
 
				         return bboxes, scores, labels
			
 
				 
			
 
				-    def forward(self, src, src_mask=None, targets=None):
			
 
				+    def forward(self, src, src_mask=None):
			
 
				         # ---------------- Backbone ----------------
			
 
				         pyramid_feats = self.backbone(src)
			
 
				 
			
--- a/odlab/models/head/__init__.py
+++ b/odlab/models/head/__init__.py
@@ -1,6 +1,5 @@
 
				-from .retinanet_head import RetinaNetHead
			
 
				-from .yolof_head     import YOLOFHead
			
 
				-from .fcos_head      import FCOSHead
			
 
				+from .yolof_head     import YolofHead
			
 
				+from .fcos_head      import FcosHead
			
 
				 
			
 
				 
			
 
				 # build head
			
@@ -8,18 +7,8 @@ def build_head(cfg, in_dim, out_dim, num_classes):
 
				     print('==============================')
			
 
				     print('Head: {}'.format(cfg['head']))
			
 
				     
			
 
				-    if cfg['head'] == 'retinanet_head':
			
 
				-        model = RetinaNetHead(cfg          = cfg,
			
 
				-                              in_dim       = in_dim,
			
 
				-                              out_dim      = out_dim,
			
 
				-                              num_classes  = num_classes,
			
 
				-                              num_cls_head = cfg['num_cls_head'],
			
 
				-                              num_reg_head = cfg['num_reg_head'],
			
 
				-                              act_type     = cfg['head_act'],
			
 
				-                              norm_type    = cfg['head_norm']
			
 
				-                              )
			
 
				-    elif cfg['head'] == 'fcos_head':
			
 
				-        model = FCOSHead(cfg          = cfg,
			
 
				+    if cfg['head'] == 'fcos_head':
			
 
				+        model = FcosHead(cfg          = cfg,
			
 
				                          in_dim       = in_dim,
			
 
				                          out_dim      = out_dim,
			
 
				                          num_classes  = num_classes,
			
@@ -29,7 +18,7 @@ def build_head(cfg, in_dim, out_dim, num_classes):
 
				                          norm_type    = cfg['head_norm']
			
 
				                          )
			
 
				     elif cfg['head'] == 'yolof_head':
			
 
				-        model = YOLOFHead(cfg          = cfg,
			
 
				+        model = YolofHead(cfg          = cfg,
			
 
				                           in_dim       = in_dim,
			
 
				                           out_dim      = out_dim,
			
 
				                           num_classes  = num_classes,
			
--- a/odlab/models/head/fcos_head.py
+++ b/odlab/models/head/fcos_head.py
@@ -25,7 +25,7 @@ class Scale(nn.Module):
 
				         return x * self.scale
			
 
				 
			
 
				 
			
 
				-class FCOSHead(nn.Module):
			
 
				+class FcosHead(nn.Module):
			
 
				     def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
			
 
				         super().__init__()
			
 
				         self.fmp_size = None
			
--- a/odlab/models/head/retinanet_head.py
+++ b/odlab/models/head/retinanet_head.py
@@ -1,203 +0,0 @@
 
				-import math
			
 
				-import torch
			
 
				-import torch.nn as nn
			
 
				-
			
 
				-from ..basic.conv import ConvModule
			
 
				-
			
 
				-
			
 
				-class RetinaNetHead(nn.Module):
			
 
				-    def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
			
 
				-        super().__init__()
			
 
				-        self.fmp_size = None
			
 
				-        self.DEFAULT_SCALE_CLAMP = math.log(1000.0 / 16)
			
 
				-        # ------------------ Basic parameters -------------------
			
 
				-        self.cfg = cfg
			
 
				-        self.in_dim = in_dim
			
 
				-        self.num_classes = num_classes
			
 
				-        self.num_cls_head=num_cls_head
			
 
				-        self.num_reg_head=num_reg_head
			
 
				-        self.act_type=act_type
			
 
				-        self.norm_type=norm_type
			
 
				-        self.stride = cfg['out_stride']
			
 
				-        # ------------------ Anchor parameters -------------------
			
 
				-        self.anchor_size = self.get_anchor_sizes(cfg)  # [S, KA, 2]
			
 
				-        self.num_anchors = self.anchor_size.shape[1]
			
 
				-
			
 
				-        # ------------------ Network parameters -------------------
			
 
				-        ## cls head
			
 
				-        cls_heads = []
			
 
				-        self.cls_head_dim = out_dim
			
 
				-        for i in range(self.num_cls_head):
			
 
				-            if i == 0:
			
 
				-                cls_heads.append(
			
 
				-                    ConvModule(in_dim, self.cls_head_dim, k=3, p=1, s=1, 
			
 
				-                               act_type=self.act_type,
			
 
				-                               norm_type=self.norm_type)
			
 
				-                               )
			
 
				-            else:
			
 
				-                cls_heads.append(
			
 
				-                    ConvModule(self.cls_head_dim, self.cls_head_dim, k=3, p=1, s=1, 
			
 
				-                               act_type=self.act_type,
			
 
				-                               norm_type=self.norm_type)
			
 
				-                               )
			
 
				-        ## reg head
			
 
				-        reg_heads = []
			
 
				-        self.reg_head_dim = out_dim
			
 
				-        for i in range(self.num_reg_head):
			
 
				-            if i == 0:
			
 
				-                reg_heads.append(
			
 
				-                    ConvModule(in_dim, self.reg_head_dim, k=3, p=1, s=1, 
			
 
				-                               act_type=self.act_type,
			
 
				-                               norm_type=self.norm_type)
			
 
				-                               )
			
 
				-            else:
			
 
				-                reg_heads.append(
			
 
				-                    ConvModule(self.reg_head_dim, self.reg_head_dim, k=3, p=1, s=1, 
			
 
				-                               act_type=self.act_type,
			
 
				-                               norm_type=self.norm_type)
			
 
				-                               )
			
 
				-        self.cls_heads = nn.Sequential(*cls_heads)
			
 
				-        self.reg_heads = nn.Sequential(*reg_heads)
			
 
				-
			
 
				-        ## pred layers
			
 
				-        self.cls_pred = nn.Conv2d(self.cls_head_dim, num_classes * self.num_anchors, kernel_size=3, padding=1)
			
 
				-        self.reg_pred = nn.Conv2d(self.reg_head_dim, 4 * self.num_anchors, kernel_size=3, padding=1)
			
 
				-
			
 
				-        # init bias
			
 
				-        self._init_layers()
			
 
				-
			
 
				-    def _init_layers(self):
			
 
				-        for module in [self.cls_heads, self.reg_heads, self.cls_pred, self.reg_pred]:
			
 
				-            for layer in module.modules():
			
 
				-                if isinstance(layer, nn.Conv2d):
			
 
				-                    torch.nn.init.normal_(layer.weight, mean=0, std=0.01)
			
 
				-                    torch.nn.init.constant_(layer.bias, 0)
			
 
				-                if isinstance(layer, nn.GroupNorm):
			
 
				-                    torch.nn.init.constant_(layer.weight, 1)
			
 
				-                    torch.nn.init.constant_(layer.bias, 0)
			
 
				-        # init the bias of cls pred
			
 
				-        init_prob = 0.01
			
 
				-        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
			
 
				-        torch.nn.init.constant_(self.cls_pred.bias, bias_value)
			
 
				-        
			
 
				-    def get_anchor_sizes(self, cfg):
			
 
				-        basic_anchor_size =   cfg['anchor_config']['basic_size']
			
 
				-        anchor_aspect_ratio = cfg['anchor_config']['aspect_ratio']
			
 
				-        anchor_area_scale =   cfg['anchor_config']['area_scale']
			
 
				-
			
 
				-        num_scales = len(basic_anchor_size)
			
 
				-        num_anchors = len(anchor_aspect_ratio) * len(anchor_area_scale)
			
 
				-        anchor_sizes = []
			
 
				-        for size in basic_anchor_size:
			
 
				-            for ar in anchor_aspect_ratio:
			
 
				-                for s in anchor_area_scale:
			
 
				-                    ah, aw = size
			
 
				-                    area = ah * aw * s
			
 
				-                    anchor_sizes.append(
			
 
				-                        [torch.sqrt(torch.tensor(ar * area)),
			
 
				-                         torch.sqrt(torch.tensor(area / ar))]
			
 
				-                         )
			
 
				-        # [S * KA, 2] -> [S, KA, 2]
			
 
				-        anchor_sizes = torch.as_tensor(anchor_sizes).view(num_scales, num_anchors, 2)
			
 
				-
			
 
				-        return anchor_sizes
			
 
				-
			
 
				-    def get_anchors(self, level, fmp_size):
			
 
				-        """
			
 
				-            fmp_size: (List) [H, W]
			
 
				-        """
			
 
				-        # generate grid cells
			
 
				-        fmp_h, fmp_w = fmp_size
			
 
				-        # [KA, 2]
			
 
				-        anchor_size = self.anchor_size[level]
			
 
				-
			
 
				-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
			
 
				-        # [H, W, 2] -> [HW, 2]
			
 
				-        anchor_xy = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2) + 0.5
			
 
				-        # [HW, 2] -> [HW, 1, 2] -> [HW, KA, 2] 
			
 
				-        anchor_xy = anchor_xy[:, None, :].repeat(1, self.num_anchors, 1)
			
 
				-        anchor_xy *= self.stride[level]
			
 
				-
			
 
				-        # [KA, 2] -> [1, KA, 2] -> [HW, KA, 2]
			
 
				-        anchor_wh = anchor_size[None, :, :].repeat(fmp_h*fmp_w, 1, 1)
			
 
				-
			
 
				-        # [HW, KA, 4] -> [M, 4], M = HW x KA
			
 
				-        anchor_boxes = torch.cat([anchor_xy, anchor_wh], dim=-1)
			
 
				-        anchor_boxes = anchor_boxes.view(-1, 4)
			
 
				-
			
 
				-        return anchor_boxes
			
 
				-        
			
 
				-    def decode_boxes(self, anchor_boxes, pred_reg):
			
 
				-        """
			
 
				-            anchor_boxes: (List[Tensor]) [1, M, 4] or [M, 4]
			
 
				-            pred_reg:     (List[Tensor]) [B, M, 4] or [M, 4]
			
 
				-        """
			
 
				-        # x = x_anchor + dx * w_anchor
			
 
				-        # y = y_anchor + dy * h_anchor
			
 
				-        pred_ctr_offset = pred_reg[..., :2] * anchor_boxes[..., 2:]
			
 
				-        pred_ctr_xy = anchor_boxes[..., :2] + pred_ctr_offset
			
 
				-
			
 
				-        # w = w_anchor * exp(tw)
			
 
				-        # h = h_anchor * exp(th)
			
 
				-        pred_dwdh = pred_reg[..., 2:]
			
 
				-        pred_dwdh = torch.clamp(pred_dwdh, max=self.DEFAULT_SCALE_CLAMP)
			
 
				-        pred_wh = anchor_boxes[..., 2:] * pred_dwdh.exp()
			
 
				-
			
 
				-        # convert [x, y, w, h] -> [x1, y1, x2, y2]
			
 
				-        pred_x1y1 = pred_ctr_xy - 0.5 * pred_wh
			
 
				-        pred_x2y2 = pred_ctr_xy + 0.5 * pred_wh
			
 
				-        pred_box = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
			
 
				-
			
 
				-        return pred_box
			
 
				-
			
 
				-    def forward(self, pyramid_feats, mask=None):
			
 
				-        all_masks = []
			
 
				-        all_anchors = []
			
 
				-        all_cls_preds = []
			
 
				-        all_reg_preds = []
			
 
				-        all_box_preds = []
			
 
				-        for level, feat in enumerate(pyramid_feats):
			
 
				-            # ------------------- Decoupled head -------------------
			
 
				-            cls_feat = self.cls_heads(feat)
			
 
				-            reg_feat = self.reg_heads(feat)
			
 
				-
			
 
				-            # ------------------- Generate anchor box -------------------
			
 
				-            B, _, H, W = cls_feat.size()
			
 
				-            fmp_size = [H, W]
			
 
				-            anchor_boxes = self.get_anchors(level, fmp_size)   # [M, 4]
			
 
				-            anchor_boxes = anchor_boxes.to(cls_feat.device)
			
 
				-
			
 
				-            # ------------------- Predict -------------------
			
 
				-            cls_pred = self.cls_pred(cls_feat)
			
 
				-            reg_pred = self.reg_pred(reg_feat)
			
 
				-
			
 
				-            # ------------------- Process preds -------------------
			
 
				-            ## [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
			
 
				-            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
			
 
				-            reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
			
 
				-            ## Decode bbox
			
 
				-            box_pred = self.decode_boxes(anchor_boxes, reg_pred)
			
 
				-            ## Adjust mask
			
 
				-            if mask is not None:
			
 
				-                # [B, H, W]
			
 
				-                mask_i = torch.nn.functional.interpolate(mask[None].float(), size=[H, W]).bool()[0]
			
 
				-                # [B, H, W] -> [B, M]
			
 
				-                mask_i = mask_i.flatten(1)     
			
 
				-                # [B, HW] -> [B, HW, KA] -> [B, M], M= HW x KA
			
 
				-                mask_i = mask_i[..., None].repeat(1, 1, self.num_anchors).flatten(1)
			
 
				-                
			
 
				-                all_masks.append(mask_i)
			
 
				-                
			
 
				-            all_anchors.append(anchor_boxes)
			
 
				-            all_cls_preds.append(cls_pred)
			
 
				-            all_reg_preds.append(reg_pred)
			
 
				-            all_box_preds.append(box_pred)
			
 
				-
			
 
				-        outputs = {"pred_cls": all_cls_preds,  # List [B, M, C]
			
 
				-                   "pred_reg": all_reg_preds,  # List [B, M, 4]
			
 
				-                   "pred_box": all_box_preds,  # List [B, M, 4]
			
 
				-                   "anchors": all_anchors,     # List [B, M, 2]
			
 
				-                   "strides": self.stride,
			
 
				-                   "mask": all_masks}          # List [B, M,]
			
 
				-
			
 
				-        return outputs 
			
--- a/odlab/models/head/yolof_head.py
+++ b/odlab/models/head/yolof_head.py
@@ -5,7 +5,7 @@ import torch.nn as nn
 
				 from ..basic.conv import ConvModule
			
 
				 
			
 
				 
			
 
				-class YOLOFHead(nn.Module):
			
 
				+class YolofHead(nn.Module):
			
 
				     def __init__(self, cfg, in_dim, out_dim, num_classes, num_cls_head=1, num_reg_head=1, act_type='relu', norm_type='BN'):
			
 
				         super().__init__()
			
 
				         self.fmp_size = None
			
--- a/odlab/models/neck/hybrid_encoder.py
+++ b/odlab/models/neck/hybrid_encoder.py
@@ -1,142 +0,0 @@
 
				-from typing import List
			
 
				-import torch
			
 
				-import torch.nn as nn
			
 
				-import torch.nn.functional as F
			
 
				-
			
 
				-from ..basic.conv import BasicConv, RepCSPLayer
			
 
				-from ..basic.transformer import TransformerEncoder
			
 
				-
			
 
				-
			
 
				-# -------------- Feature Pyramid Network + Transformer Encoder --------------
			
 
				-class HybridEncoder(nn.Module):
			
 
				-    def __init__(self, 
			
 
				-                 in_dims        :List  = [256, 512, 1024],
			
 
				-                 out_dim        :int   = 256,
			
 
				-                 num_blocks     :int   = 3,
			
 
				-                 expansion      :float = 1.0,
			
 
				-                 act_type       :str   = 'silu',
			
 
				-                 norm_type      :str   = 'GN',
			
 
				-                 depthwise      :bool  = False,
			
 
				-                 # Transformer's parameters
			
 
				-                 num_heads      :int   = 8,
			
 
				-                 num_layers     :int   = 1,
			
 
				-                 ffn_dim        :int   = 1024,
			
 
				-                 dropout        :float = 0.1,
			
 
				-                 pe_temperature :float = 10000.,
			
 
				-                 en_act_type    :str   = 'gelu',
			
 
				-                 en_pre_norm    :bool  = False,
			
 
				-                 ) -> None:
			
 
				-        super(HybridEncoder, self).__init__()
			
 
				-        # ---------------- Basic parameters ----------------
			
 
				-        self.in_dims = in_dims
			
 
				-        self.out_dim = out_dim
			
 
				-        self.out_dims = [self.out_dim] * len(in_dims)
			
 
				-        self.num_heads = num_heads
			
 
				-        self.num_layers = num_layers
			
 
				-        self.ffn_dim = ffn_dim
			
 
				-        c3, c4, c5 = in_dims
			
 
				-
			
 
				-        # ---------------- Input projs ----------------
			
 
				-        self.input_proj_1 = BasicConv(c5, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.input_proj_2 = BasicConv(c4, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.input_proj_3 = BasicConv(c3, self.out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				-
			
 
				-        # ---------------- Transformer Encoder ----------------
			
 
				-        self.transformer_encoder = TransformerEncoder(d_model        = self.out_dim,
			
 
				-                                                      num_heads      = num_heads,
			
 
				-                                                      num_layers     = num_layers,
			
 
				-                                                      ffn_dim        = ffn_dim,
			
 
				-                                                      pe_temperature = pe_temperature,
			
 
				-                                                      dropout        = dropout,
			
 
				-                                                      act_type       = en_act_type,
			
 
				-                                                      pre_norm       = en_pre_norm,
			
 
				-                                                      )
			
 
				-
			
 
				-        # ---------------- Top dwon FPN ----------------
			
 
				-        ## P5 -> P4
			
 
				-        self.reduce_layer_1 = BasicConv(self.out_dim, self.out_dim,
			
 
				-                                        kernel_size=1, padding=0, stride=1,
			
 
				-                                        act_type=act_type, norm_type=norm_type)
			
 
				-        self.top_down_layer_1 = RepCSPLayer(in_dim      = self.out_dim * 2,
			
 
				-                                            out_dim     = self.out_dim,
			
 
				-                                            num_blocks  = num_blocks,
			
 
				-                                            expansion   = expansion,
			
 
				-                                            act_type    = act_type,
			
 
				-                                            norm_type   = norm_type,
			
 
				-                                            )
			
 
				-        ## P4 -> P3
			
 
				-        self.reduce_layer_2 = BasicConv(self.out_dim, self.out_dim,
			
 
				-                                        kernel_size=1, padding=0, stride=1,
			
 
				-                                        act_type=act_type, norm_type=norm_type)
			
 
				-        self.top_down_layer_2 = RepCSPLayer(in_dim      = self.out_dim * 2,
			
 
				-                                            out_dim     = self.out_dim,
			
 
				-                                            num_blocks  = num_blocks,
			
 
				-                                            expansion   = expansion,
			
 
				-                                            act_type    = act_type,
			
 
				-                                            norm_type   = norm_type,
			
 
				-                                            )
			
 
				-        
			
 
				-        # ---------------- Bottom up PAN----------------
			
 
				-        ## P3 -> P4
			
 
				-        self.dowmsample_layer_1 = BasicConv(self.out_dim, self.out_dim,
			
 
				-                                            kernel_size=3, padding=1, stride=2,
			
 
				-                                            act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        self.bottom_up_layer_1 = RepCSPLayer(in_dim      = self.out_dim * 2,
			
 
				-                                             out_dim     = self.out_dim,
			
 
				-                                             num_blocks  = num_blocks,
			
 
				-                                             expansion   = expansion,
			
 
				-                                             act_type    = act_type,
			
 
				-                                             norm_type   = norm_type,
			
 
				-                                             )
			
 
				-        ## P4 -> P5
			
 
				-        self.dowmsample_layer_2 = BasicConv(self.out_dim, self.out_dim,
			
 
				-                                            kernel_size=3, padding=1, stride=2,
			
 
				-                                            act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        self.bottom_up_layer_2 = RepCSPLayer(in_dim      = self.out_dim * 2,
			
 
				-                                             out_dim     = self.out_dim,
			
 
				-                                             num_blocks  = num_blocks,
			
 
				-                                             expansion   = expansion,
			
 
				-                                             act_type    = act_type,
			
 
				-                                             norm_type   = norm_type,
			
 
				-                                             )
			
 
				-
			
 
				-        self.init_weights()
			
 
				-  
			
 
				-    def init_weights(self):
			
 
				-        """Initialize the parameters."""
			
 
				-        for m in self.modules():
			
 
				-            if isinstance(m, torch.nn.Conv2d):
			
 
				-                # In order to be consistent with the source code,
			
 
				-                # reset the Conv2d initialization parameters
			
 
				-                m.reset_parameters()
			
 
				-
			
 
				-    def forward(self, features):
			
 
				-        c3, c4, c5 = features
			
 
				-
			
 
				-        # -------- Input projs --------
			
 
				-        p5 = self.input_proj_1(c5)
			
 
				-        p4 = self.input_proj_2(c4)
			
 
				-        p3 = self.input_proj_3(c3)
			
 
				-
			
 
				-        # -------- Transformer encoder --------
			
 
				-        p5 = self.transformer_encoder(p5)
			
 
				-
			
 
				-        # -------- Top down FPN --------
			
 
				-        p5_in = self.reduce_layer_1(p5)
			
 
				-        p5_up = F.interpolate(p5_in, size=p4.shape[2:])
			
 
				-        p4 = self.top_down_layer_1(torch.cat([p4, p5_up], dim=1))
			
 
				-
			
 
				-        p4_in = self.reduce_layer_2(p4)
			
 
				-        p4_up = F.interpolate(p4_in, size=p3.shape[2:])
			
 
				-        p3 = self.top_down_layer_2(torch.cat([p3, p4_up], dim=1))
			
 
				-
			
 
				-        # -------- Bottom up PAN --------
			
 
				-        p3_ds = self.dowmsample_layer_1(p3)
			
 
				-        p4 = self.bottom_up_layer_1(torch.cat([p4_in, p3_ds], dim=1))
			
 
				-
			
 
				-        p4_ds = self.dowmsample_layer_2(p4)
			
 
				-        p5 = self.bottom_up_layer_2(torch.cat([p5_in, p4_ds], dim=1))
			
 
				-
			
 
				-        out_feats = [p3, p4, p5]
			
 
				-        
			
 
				-        return out_feats
			
--- a/odlab/test.py
+++ b/odlab/test.py
@@ -11,6 +11,7 @@ from datasets import build_dataset, build_transform
 
				 
			
 
				 # load some utils
			
 
				 from utils.misc import load_weight, compute_flops
			
 
				+from utils.vis_tools import visualize
			
 
				 
			
 
				 from config import build_config
			
 
				 from models.detectors import build_model
			
@@ -31,8 +32,6 @@ def parse_args():
 
				                         help='Final confidence threshold')
			
 
				     parser.add_argument('-ws', '--window_scale', default=1.0, type=float,
			
 
				                         help='resize window of cv2 for visualization.')
			
 
				-    parser.add_argument('--resave', action='store_true', default=False, 
			
 
				-                        help='resave checkpoints without optimizer state dict.')
			
 
				     # Model
			
 
				     parser.add_argument('-m', '--model', default='yolof_r18_c5_1x', type=str,
			
 
				                         help='build detector')
			
@@ -48,41 +47,8 @@ def parse_args():
 
				 
			
 
				     return parser.parse_args()
			
 
				 
			
 
				-def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
			
 
				-    x1, y1, x2, y2 = bbox
			
 
				-    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
			
 
				-    t_size = cv2.getTextSize(label, 0, fontScale=1, thickness=2)[0]
			
 
				-    # plot bbox
			
 
				-    cv2.rectangle(img, (x1, y1), (x2, y2), cls_color, 2)
			
 
				-    
			
 
				-    if label is not None:
			
 
				-        # plot title bbox
			
 
				-        cv2.rectangle(img, (x1, y1-t_size[1]), (int(x1 + t_size[0] * text_scale), y1), cls_color, -1)
			
 
				-        # put the test on the title bbox
			
 
				-        cv2.putText(img, label, (int(x1), int(y1 - 5)), 0, text_scale, (0, 0, 0), 1, lineType=cv2.LINE_AA)
			
 
				-
			
 
				-    return img
			
 
				-
			
 
				-def visualize(img, 
			
 
				-              bboxes, 
			
 
				-              scores, 
			
 
				-              labels, 
			
 
				-              vis_thresh, 
			
 
				-              class_colors, 
			
 
				-              class_names):
			
 
				-    ts = 0.4
			
 
				-    for i, bbox in enumerate(bboxes):
			
 
				-        if scores[i] > vis_thresh:
			
 
				-            cls_id = int(labels[i])
			
 
				-            cls_color = class_colors[cls_id]
			
 
				-                
			
 
				-            mess = '%s: %.2f' % (class_names[cls_id], scores[i])
			
 
				-            img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts)
			
 
				-
			
 
				-    return img
			
 
				-        
			
 
				 @torch.no_grad()
			
 
				-def run(args, model, device, dataset, transform, class_colors, class_names):
			
 
				+def test_det(args, model, device, dataset, transform, class_colors, class_names):
			
 
				     num_images = len(dataset)
			
 
				     save_path = os.path.join('det_results/', args.dataset, args.model)
			
 
				     os.makedirs(save_path, exist_ok=True)
			
@@ -97,7 +63,10 @@ def run(args, model, device, dataset, transform, class_colors, class_names):
 
				 
			
 
				         # Inference
			
 
				         t0 = time.time()
			
 
				-        bboxes, scores, labels = model(x)
			
 
				+        outputs = model(x)
			
 
				+        scores = outputs['scores']
			
 
				+        labels = outputs['labels']
			
 
				+        bboxes = outputs['bboxes']
			
 
				         print("Infer. time: {}".format(time.time() - t0, "s"))
			
 
				         
			
 
				         # Rescale bboxes
			
@@ -105,10 +74,12 @@ def run(args, model, device, dataset, transform, class_colors, class_names):
 
				         bboxes[..., 1::2] *= orig_h
			
 
				 
			
 
				         # vis detection
			
 
				-        image = np.array(image).astype(np.uint8)
			
 
				-        image = image[..., (2, 1, 0)].copy()
			
 
				-        img_processed = visualize(
			
 
				-            image, bboxes, scores, labels, args.visual_threshold, class_colors, class_names)
			
 
				+        img_processed = visualize(image=image,
			
 
				+                                  bboxes=bboxes,
			
 
				+                                  scores=scores,
			
 
				+                                  labels=labels,
			
 
				+                                  class_colors=class_colors,
			
 
				+                                  class_names=class_names)
			
 
				         if args.show:
			
 
				             h, w = img_processed.shape[:2]
			
 
				             sw, sh = int(w*args.window_scale), int(h*args.window_scale)
			
@@ -138,16 +109,15 @@ if __name__ == '__main__':
 
				     transform = build_transform(cfg, is_train=False)
			
 
				 
			
 
				     # Dataset
			
 
				-    dataset, dataset_info = build_dataset(args, is_train=False)
			
 
				+    dataset = build_dataset(args, cfg, is_train=False)
			
 
				 
			
 
				     np.random.seed(0)
			
 
				     class_colors = [(np.random.randint(255),
			
 
				                      np.random.randint(255),
			
 
				-                     np.random.randint(255))
			
 
				-                     for _ in range(dataset_info['num_classes'])]
			
 
				+                     np.random.randint(255)) for _ in range(cfg.num_classes)]
			
 
				 
			
 
				     # Model
			
 
				-    model = build_model(args, cfg, dataset_info['num_classes'], is_val=False)
			
 
				+    model = build_model(args, cfg, is_val=False)
			
 
				     model = load_weight(model, args.weight, args.fuse_conv_bn)
			
 
				     model.to(device).eval()
			
 
				 
			
@@ -161,19 +131,14 @@ if __name__ == '__main__':
 
				         max_size=cfg['test_max_size'],
			
 
				         device=device)
			
 
				     del model_copy
			
 
				-
			
 
				-    # Resave model weight
			
 
				-    if args.resave:
			
 
				-        print('Resave: {}'.format(args.model.upper()))
			
 
				-        checkpoint = torch.load(args.weight, map_location='cpu')
			
 
				-        output_dir = 'weights/{}/{}/'.format(args.dataset, args.model)
			
 
				-        os.makedirs(output_dir, exist_ok=True)
			
 
				-        checkpoint_path = os.path.join(output_dir, "{}_pure.pth".format(args.model))
			
 
				-        torch.save({'model': model.state_dict(),
			
 
				-                    'mAP': checkpoint.pop("mAP"),
			
 
				-                    'epoch': checkpoint.pop("epoch")}, 
			
 
				-                    checkpoint_path)
			
 
				         
			
 
				     print("================= DETECT =================")
			
 
				     # run
			
 
				-    run(args, model, device, dataset, transform, class_colors, dataset_info['class_labels'])
			
 
				+    test_det(args         = args,
			
 
				+             model        = model, 
			
 
				+             device       = device, 
			
 
				+             dataset      = dataset,
			
 
				+             transform    = transform,
			
 
				+             class_colors = class_colors,
			
 
				+             class_names  = cfg.class_labels,
			
 
				+             )
			
--- a/odlab/utils/box_ops.py
+++ b/odlab/utils/box_ops.py
@@ -59,81 +59,18 @@ def get_ious(bboxes1,
 
				     else:
			
 
				         raise NotImplementedError
			
 
				 
			
 
				-
			
 
				-def delta2bbox(proposals,
			
 
				-               deltas,
			
 
				-               max_shape=None,
			
 
				-               wh_ratio_clip=16 / 1000,
			
 
				-               clip_border=True,
			
 
				-               add_ctr_clamp=False,
			
 
				-               ctr_clamp=32):
			
 
				-
			
 
				-    dxy = deltas[..., :2]
			
 
				-    dwh = deltas[..., 2:]
			
 
				-
			
 
				-    # Compute width/height of each roi
			
 
				-    pxy = proposals[..., :2]
			
 
				-    pwh = proposals[..., 2:]
			
 
				-
			
 
				-    dxy_wh = pwh * dxy
			
 
				-    wh_ratio_clip = torch.tensor(wh_ratio_clip).to(deltas.device)
			
 
				-    max_ratio = torch.abs(torch.log(wh_ratio_clip))
			
 
				-    if add_ctr_clamp:
			
 
				-        dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)
			
 
				-        dwh = torch.clamp(dwh, max=max_ratio)
			
 
				-    else:
			
 
				-        dwh = dwh.clamp(min=-max_ratio, max=max_ratio)
			
 
				-
			
 
				-    gxy = pxy + dxy_wh
			
 
				-    gwh = pwh * dwh.exp()
			
 
				-    x1y1 = gxy - (gwh * 0.5)
			
 
				-    x2y2 = gxy + (gwh * 0.5)
			
 
				-    bboxes = torch.cat([x1y1, x2y2], dim=-1)
			
 
				-    if clip_border and max_shape is not None:
			
 
				-        bboxes[..., 0::2].clamp_(min=0).clamp_(max=max_shape[1])
			
 
				-        bboxes[..., 1::2].clamp_(min=0).clamp_(max=max_shape[0])
			
 
				-        
			
 
				-    return bboxes
			
 
				-
			
 
				-
			
 
				-def bbox2delta(proposals, gt, means=(0., 0., 0., 0.), stds=(1., 1., 1., 1.)):
			
 
				-    # hack for matcher
			
 
				-    if proposals.size() != gt.size():
			
 
				-        proposals = proposals[:, None]
			
 
				-        gt = gt[None]
			
 
				-
			
 
				-    proposals = proposals.float()
			
 
				-    gt = gt.float()
			
 
				-    px, py, pw, ph = proposals.unbind(-1)
			
 
				-    gx, gy, gw, gh = gt.unbind(-1)
			
 
				-
			
 
				-    dx = (gx - px) / (pw + 0.1)
			
 
				-    dy = (gy - py) / (ph + 0.1)
			
 
				-    dw = torch.log(gw / (pw + 0.1))
			
 
				-    dh = torch.log(gh / (ph + 0.1))
			
 
				-    deltas = torch.stack([dx, dy, dw, dh], dim=-1)
			
 
				-
			
 
				-    means = deltas.new_tensor(means).unsqueeze(0)
			
 
				-    stds = deltas.new_tensor(stds).unsqueeze(0)
			
 
				-    deltas = deltas.sub_(means).div_(stds)
			
 
				-
			
 
				-    return deltas
			
 
				-
			
 
				-
			
 
				 def box_cxcywh_to_xyxy(x):
			
 
				     x_c, y_c, w, h = x.unbind(-1)
			
 
				     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
			
 
				          (x_c + 0.5 * w), (y_c + 0.5 * h)]
			
 
				     return torch.stack(b, dim=-1)
			
 
				 
			
 
				-
			
 
				 def box_xyxy_to_cxcywh(x):
			
 
				     x0, y0, x1, y1 = x.unbind(-1)
			
 
				     b = [(x0 + x1) / 2, (y0 + y1) / 2,
			
 
				          (x1 - x0), (y1 - y0)]
			
 
				     return torch.stack(b, dim=-1)
			
 
				 
			
 
				-
			
 
				 # modified from torchvision to also return the union
			
 
				 def box_iou(boxes1, boxes2):
			
 
				     area1 = box_area(boxes1)
			
@@ -152,7 +89,6 @@ def box_iou(boxes1, boxes2):
 
				     
			
 
				     return iou, union
			
 
				 
			
 
				-
			
 
				 def generalized_box_iou(boxes1, boxes2):
			
 
				     """
			
 
				     Generalized IoU from https://giou.stanford.edu/
			
@@ -175,30 +111,3 @@ def generalized_box_iou(boxes1, boxes2):
 
				     area = wh[:, :, 0] * wh[:, :, 1]
			
 
				 
			
 
				     return iou - (area - union) / area
			
 
				-
			
 
				-
			
 
				-def masks_to_boxes(masks):
			
 
				-    """Compute the bounding boxes around the provided masks
			
 
				-
			
 
				-    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
			
 
				-
			
 
				-    Returns a [N, 4] tensors, with the boxes in xyxy format
			
 
				-    """
			
 
				-    if masks.numel() == 0:
			
 
				-        return torch.zeros((0, 4), device=masks.device)
			
 
				-
			
 
				-    h, w = masks.shape[-2:]
			
 
				-
			
 
				-    y = torch.arange(0, h, dtype=torch.float)
			
 
				-    x = torch.arange(0, w, dtype=torch.float)
			
 
				-    y, x = torch.meshgrid(y, x)
			
 
				-
			
 
				-    x_mask = (masks * x.unsqueeze(0))
			
 
				-    x_max = x_mask.flatten(1).max(-1)[0]
			
 
				-    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
			
 
				-
			
 
				-    y_mask = (masks * y.unsqueeze(0))
			
 
				-    y_max = y_mask.flatten(1).max(-1)[0]
			
 
				-    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
			
 
				-
			
 
				-    return torch.stack([x_min, y_min, x_max, y_max], 1)
			
--- a/odlab/utils/dn_compoments.py
+++ b/odlab/utils/dn_compoments.py
@@ -1,98 +0,0 @@
 
				-import torch
			
 
				-from .box_ops import box_cxcywh_to_xyxy, box_xyxy_to_cxcywh
			
 
				-
			
 
				-
			
 
				-def inverse_sigmoid(x, eps=1e-5):
			
 
				-    x = x.clamp(min=0., max=1.)
			
 
				-    return torch.log(x.clamp(min=eps) / (1 - x).clamp(min=eps))
			
 
				-
			
 
				-def get_contrastive_denoising_training_group(targets,
			
 
				-                                             num_classes,
			
 
				-                                             num_queries,
			
 
				-                                             class_embed,
			
 
				-                                             num_denoising=100,
			
 
				-                                             label_noise_ratio=0.5,
			
 
				-                                             box_noise_scale=1.0,):
			
 
				-    if num_denoising <= 0:
			
 
				-        return None, None, None, None
			
 
				-
			
 
				-    num_gts = [len(t['labels']) for t in targets]
			
 
				-    device = targets[0]['labels'].device
			
 
				-    
			
 
				-    max_gt_num = max(num_gts)
			
 
				-    if max_gt_num == 0:
			
 
				-        return None, None, None, None
			
 
				-
			
 
				-    num_group = num_denoising // max_gt_num
			
 
				-    num_group = 1 if num_group == 0 else num_group
			
 
				-    # pad gt to max_num of a batch
			
 
				-    bs = len(num_gts)
			
 
				-
			
 
				-    input_query_class = torch.full([bs, max_gt_num], num_classes, dtype=torch.int32, device=device)
			
 
				-    input_query_bbox = torch.zeros([bs, max_gt_num, 4], device=device)
			
 
				-    pad_gt_mask = torch.zeros([bs, max_gt_num], dtype=torch.bool, device=device)
			
 
				-
			
 
				-    for i in range(bs):
			
 
				-        num_gt = num_gts[i]
			
 
				-        if num_gt > 0:
			
 
				-            input_query_class[i, :num_gt] = targets[i]['labels']
			
 
				-            input_query_bbox[i, :num_gt] = targets[i]['boxes']
			
 
				-            pad_gt_mask[i, :num_gt] = 1
			
 
				-    # each group has positive and negative queries.
			
 
				-    input_query_class = input_query_class.tile([1, 2 * num_group])
			
 
				-    input_query_bbox = input_query_bbox.tile([1, 2 * num_group, 1])
			
 
				-    pad_gt_mask = pad_gt_mask.tile([1, 2 * num_group])
			
 
				-    # positive and negative mask
			
 
				-    negative_gt_mask = torch.zeros([bs, max_gt_num * 2, 1], device=device)
			
 
				-    negative_gt_mask[:, max_gt_num:] = 1
			
 
				-    negative_gt_mask = negative_gt_mask.tile([1, num_group, 1])
			
 
				-    positive_gt_mask = 1 - negative_gt_mask
			
 
				-    # contrastive denoising training positive index
			
 
				-    positive_gt_mask = positive_gt_mask.squeeze(-1) * pad_gt_mask
			
 
				-    dn_positive_idx = torch.nonzero(positive_gt_mask)[:, 1]
			
 
				-    dn_positive_idx = torch.split(dn_positive_idx, [n * num_group for n in num_gts])
			
 
				-    # total denoising queries
			
 
				-    num_denoising = int(max_gt_num * 2 * num_group)
			
 
				-
			
 
				-    if label_noise_ratio > 0:
			
 
				-        mask = torch.rand_like(input_query_class, dtype=torch.float) < (label_noise_ratio * 0.5)
			
 
				-        # randomly put a new one here
			
 
				-        new_label = torch.randint_like(mask, 0, num_classes, dtype=input_query_class.dtype)
			
 
				-        input_query_class = torch.where(mask & pad_gt_mask, new_label, input_query_class)
			
 
				-
			
 
				-    if box_noise_scale > 0:
			
 
				-        known_bbox = box_cxcywh_to_xyxy(input_query_bbox)
			
 
				-        diff = torch.tile(input_query_bbox[..., 2:] * 0.5, [1, 1, 2]) * box_noise_scale
			
 
				-        rand_sign = torch.randint_like(input_query_bbox, 0, 2) * 2.0 - 1.0
			
 
				-        rand_part = torch.rand_like(input_query_bbox)
			
 
				-        rand_part = (rand_part + 1.0) * negative_gt_mask + rand_part * (1 - negative_gt_mask)
			
 
				-        rand_part *= rand_sign
			
 
				-        known_bbox += rand_part * diff
			
 
				-        known_bbox.clip_(min=0.0, max=1.0)
			
 
				-        input_query_bbox = box_xyxy_to_cxcywh(known_bbox)
			
 
				-        input_query_bbox = inverse_sigmoid(input_query_bbox)
			
 
				-    input_query_class = class_embed(input_query_class)
			
 
				-
			
 
				-    tgt_size = num_denoising + num_queries
			
 
				-    # attn_mask = torch.ones([tgt_size, tgt_size], device=device) < 0
			
 
				-    attn_mask = torch.full([tgt_size, tgt_size], False, dtype=torch.bool, device=device)
			
 
				-    # match query cannot see the reconstruction
			
 
				-    attn_mask[num_denoising:, :num_denoising] = True
			
 
				-    
			
 
				-    # reconstruct cannot see each other
			
 
				-    for i in range(num_group):
			
 
				-        if i == 0:
			
 
				-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True
			
 
				-        if i == num_group - 1:
			
 
				-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * i * 2] = True
			
 
				-        else:
			
 
				-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), max_gt_num * 2 * (i + 1): num_denoising] = True
			
 
				-            attn_mask[max_gt_num * 2 * i: max_gt_num * 2 * (i + 1), :max_gt_num * 2 * i] = True
			
 
				-        
			
 
				-    dn_meta = {
			
 
				-        "dn_positive_idx": dn_positive_idx,
			
 
				-        "dn_num_group": num_group,
			
 
				-        "dn_num_split": [num_denoising, num_queries]
			
 
				-    }
			
 
				-
			
 
				-    return input_query_class, input_query_bbox, attn_mask, dn_meta
			
--- a/odlab/utils/lr_scheduler.py
+++ b/odlab/utils/lr_scheduler.py
@@ -51,7 +51,7 @@ def build_lr_scheduler(cfg, optimizer, resume=None):
 
				         pass
			
 
				         
			
 
				     if resume is not None:
			
 
				-        print('keep training: ', resume)
			
 
				+        print('Load lr scheduler from the checkpoint: ', resume)
			
 
				         checkpoint = torch.load(resume)
			
 
				         # checkpoint state dict
			
 
				         checkpoint_state_dict = checkpoint.pop("lr_scheduler")
			
--- a/odlab/utils/misc.py
+++ b/odlab/utils/misc.py
@@ -2,13 +2,11 @@
 
				 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
			
 
				 # ---------------------------------------------------------------------------
			
 
				 import time
			
 
				-import math
			
 
				 import datetime
			
 
				 import numpy as np
			
 
				-from typing import List
			
 
				-from thop import profile
			
 
				-from copy import deepcopy
			
 
				-from collections import defaultdict, deque
			
 
				+from   typing import List
			
 
				+from   thop import profile
			
 
				+from   collections import defaultdict, deque
			
 
				 
			
 
				 import torch
			
 
				 import torch.nn as nn
			
@@ -243,14 +241,6 @@ def collate_fn(batch):
 
				 
			
 
				 
			
 
				 # ---------------------------- For Model ----------------------------
			
 
				-def match_name_keywords(n, name_keywords):
			
 
				-    out = False
			
 
				-    for b in name_keywords:
			
 
				-        if b in n:
			
 
				-            out = True
			
 
				-            break
			
 
				-    return out
			
 
				-
			
 
				 ## fuse Conv & BN layer
			
 
				 def fuse_conv_bn(module):
			
 
				     """Recursively fuse conv and bn in a module.
			
@@ -346,133 +336,6 @@ def get_total_grad_norm(parameters, norm_type=2):
 
				                             norm_type)
			
 
				     return total_norm
			
 
				 
			
 
				-## param Dict
			
 
				-def get_param_dict(model, cfg, return_name=False):
			
 
				-    # sanity check: a variable could not match backbone_names and linear_proj_names at the same time
			
 
				-    cfg['lr_backbone'] = cfg['base_lr'] * cfg['backbone_lr_ratio']
			
 
				-    for n, p in model.named_parameters():
			
 
				-        if match_name_keywords(n, cfg['lr_backbone_names']) and match_name_keywords(n, cfg['lr_linear_proj_names']):
			
 
				-            raise ValueError
			
 
				-
			
 
				-    param_dicts = [
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if not match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and not match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['base_lr'],
			
 
				-            "weight_decay": cfg['weight_decay'],
			
 
				-        },
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and not match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['lr_backbone'],
			
 
				-            "weight_decay": cfg['weight_decay'],
			
 
				-        },
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if not match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and not match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['base_lr'] * cfg['lr_linear_proj_mult'],
			
 
				-            "weight_decay": cfg['weight_decay'],
			
 
				-        },
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if not match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['base_lr'],
			
 
				-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
			
 
				-        },
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and not match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['lr_backbone'],
			
 
				-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
			
 
				-        },
			
 
				-        {
			
 
				-            "params": [
			
 
				-                p if not return_name else n
			
 
				-                for n, p in model.named_parameters()
			
 
				-                if not match_name_keywords(n, cfg['lr_backbone_names'])
			
 
				-                and match_name_keywords(n, cfg['lr_linear_proj_names'])
			
 
				-                and match_name_keywords(n, cfg['wd_norm_names'])
			
 
				-                and p.requires_grad
			
 
				-            ],
			
 
				-            "lr": cfg['base_lr'] * cfg['lr_linear_proj_mult'],
			
 
				-            "weight_decay": cfg['weight_decay'] * cfg['wd_norm_mult'],
			
 
				-        },
			
 
				-    ]
			
 
				-
			
 
				-    return param_dicts
			
 
				-
			
 
				-## Model EMA
			
 
				-class ModelEMA(object):
			
 
				-    def __init__(self, cfg, model, updates=0):
			
 
				-        # Create EMA
			
 
				-        self.ema = deepcopy(self.de_parallel(model)).eval()  # FP32 EMA
			
 
				-        self.updates = updates  # number of EMA updates
			
 
				-        self.decay = lambda x: cfg['ema_decay'] * (1 - math.exp(-x / cfg['ema_tau']))  # decay exponential ramp (to help early epochs)
			
 
				-        for p in self.ema.parameters():
			
 
				-            p.requires_grad_(False)
			
 
				-
			
 
				-    def is_parallel(self, model):
			
 
				-        # Returns True if model is of type DP or DDP
			
 
				-        return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
			
 
				-
			
 
				-    def de_parallel(self, model):
			
 
				-        # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
			
 
				-        return model.module if self.is_parallel(model) else model
			
 
				-
			
 
				-    def copy_attr(self, a, b, include=(), exclude=()):
			
 
				-        # Copy attributes from b to a, options to only include [...] and to exclude [...]
			
 
				-        for k, v in b.__dict__.items():
			
 
				-            if (len(include) and k not in include) or k.startswith('_') or k in exclude:
			
 
				-                continue
			
 
				-            else:
			
 
				-                setattr(a, k, v)
			
 
				-
			
 
				-    def update(self, model):
			
 
				-        # Update EMA parameters
			
 
				-        self.updates += 1
			
 
				-        d = self.decay(self.updates)
			
 
				-
			
 
				-        msd = self.de_parallel(model).state_dict()  # model state_dict
			
 
				-        for k, v in self.ema.state_dict().items():
			
 
				-            if v.dtype.is_floating_point:  # true for FP16 and FP32
			
 
				-                v *= d
			
 
				-                v += (1 - d) * msd[k].detach()
			
 
				-        # assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
			
 
				-
			
 
				-    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
			
 
				-        # Update EMA attributes
			
 
				-        self.copy_attr(self.ema, model, include, exclude)
			
 
				-
			
 
				 
			
 
				 # ---------------------------- For Loss ----------------------------
			
 
				 ## focal loss
			
--- a/odlab/utils/optimizer.py
+++ b/odlab/utils/optimizer.py
@@ -2,97 +2,40 @@ import torch
 
				 from torch import optim
			
 
				 
			
 
				 
			
 
				-def build_optimizer(optimizer_cfg, model, param_dicts=None, resume=None):
			
 
				+def build_optimizer(cfg, model, resume=None):
			
 
				     print('==============================')
			
 
				-    print('Optimizer: {}'.format(optimizer_cfg['optimizer']))
			
 
				-    print('--base_lr: {}'.format(optimizer_cfg['base_lr']))
			
 
				-    print('--backbone_lr_ratio: {}'.format(optimizer_cfg['backbone_lr_ratio']))
			
 
				-    print('--momentum: {}'.format(optimizer_cfg['momentum']))
			
 
				-    print('--weight_decay: {}'.format(optimizer_cfg['weight_decay']))
			
 
				-
			
 
				-    if param_dicts is None:
			
 
				-        param_dicts = [
			
 
				-            {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
			
 
				-            {
			
 
				-                "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
			
 
				-                "lr": optimizer_cfg['base_lr'] * optimizer_cfg['backbone_lr_ratio'],
			
 
				-            },
			
 
				-        ]
			
 
				-
			
 
				-    if optimizer_cfg['optimizer'] == 'sgd':
			
 
				+    print('Optimizer: {}'.format(cfg.optimizer))
			
 
				+    print('--base_lr: {}'.format(cfg.base_lr))
			
 
				+    print('--backbone_lr_ratio: {}'.format(cfg.backbone_lr_ratio))
			
 
				+    print('--momentum: {}'.format(cfg.momentum))
			
 
				+    print('--weight_decay: {}'.format(cfg.weight_decay))
			
 
				+
			
 
				+    param_dicts = [
			
 
				+        {"params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad]},
			
 
				+        {
			
 
				+            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
			
 
				+            "lr": cfg.base_lr * cfg.backbone_lr_ratio,
			
 
				+        },
			
 
				+    ]
			
 
				+
			
 
				+    if cfg.optimizer == 'sgd':
			
 
				         optimizer = optim.SGD(
			
 
				             params=param_dicts, 
			
 
				-            lr=optimizer_cfg['base_lr'],
			
 
				-            momentum=optimizer_cfg['momentum'],
			
 
				-            weight_decay=optimizer_cfg['weight_decay']
			
 
				+            lr=cfg.base_lr,
			
 
				+            momentum=cfg.momentum,
			
 
				+            weight_decay=cfg.weight_decay
			
 
				             )
			
 
				                                 
			
 
				-    elif optimizer_cfg['optimizer'] == 'adamw':
			
 
				+    elif cfg.optimizer == 'adamw':
			
 
				         optimizer = optim.AdamW(
			
 
				             params=param_dicts, 
			
 
				-            lr=optimizer_cfg['base_lr'],
			
 
				-            weight_decay=optimizer_cfg['weight_decay']
			
 
				+            lr=cfg.base_lr,
			
 
				+            weight_decay=cfg.weight_decay
			
 
				             )
			
 
				                                 
			
 
				     start_epoch = 0
			
 
				     if resume is not None:
			
 
				-        print('keep training: ', resume)
			
 
				-        checkpoint = torch.load(resume)
			
 
				-        # checkpoint state dict
			
 
				-        checkpoint_state_dict = checkpoint.pop("optimizer")
			
 
				-        optimizer.load_state_dict(checkpoint_state_dict)
			
 
				-        start_epoch = checkpoint.pop("epoch") + 1
			
 
				-                                                        
			
 
				-    return optimizer, start_epoch
			
 
				-
			
 
				-
			
 
				-def build_detr_optimizer(optimizer_cfg, model, resume=None):
			
 
				-    print('==============================')
			
 
				-    print('Optimizer: {}'.format(optimizer_cfg['optimizer']))
			
 
				-    print('--base_lr: {}'.format(optimizer_cfg['base_lr']))
			
 
				-    print('--backbone_lr_ratio: {}'.format(optimizer_cfg['backbone_lr_ratio']))
			
 
				-    print('--weight_decay: {}'.format(optimizer_cfg['weight_decay']))
			
 
				-
			
 
				-    # ------------- Divide model's parameters -------------
			
 
				-    param_dicts = [], [], [], [], [], [], []
			
 
				-    norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)]
			
 
				-    for n, p in model.named_parameters():
			
 
				-        # Non-Backbone's learnable parameters
			
 
				-        if "backbone" not in n and p.requires_grad:
			
 
				-            if "bias" == n.split(".")[-1]:
			
 
				-                param_dicts[0].append(p)      # no weight decay for all layers' bias
			
 
				-            else:
			
 
				-                if n.split(".")[-2] in norm_names:
			
 
				-                    param_dicts[1].append(p)  # no weight decay for all NormLayers' weight
			
 
				-                elif "cpb_mlp1" in n.split(".") or "cpb_mlp2" in n.split("."):
			
 
				-                    param_dicts[2].append(p)  # no weight decay for plain-detr cpb_mlp weight
			
 
				-                else:
			
 
				-                    param_dicts[3].append(p)  # weight decay for all Non-NormLayers' weight
			
 
				-        # Backbone's learnable parameters
			
 
				-        elif "backbone" in n and p.requires_grad:
			
 
				-            if "bias" == n.split(".")[-1]:
			
 
				-                param_dicts[4].append(p)      # no weight decay for all layers' bias
			
 
				-            else:
			
 
				-                if n.split(".")[-2] in norm_names:
			
 
				-                    param_dicts[5].append(p)  # no weight decay for all NormLayers' weight
			
 
				-                else:
			
 
				-                    param_dicts[6].append(p)  # weight decay for all Non-NormLayers' weight
			
 
				-
			
 
				-    # Non-Backbone's learnable parameters
			
 
				-    optimizer = torch.optim.AdamW(param_dicts[0], lr=optimizer_cfg['base_lr'], weight_decay=0.0)
			
 
				-    optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[2], "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[3], "weight_decay": optimizer_cfg['weight_decay']})
			
 
				-
			
 
				-    # Backbone's learnable parameters
			
 
				-    backbone_lr = optimizer_cfg['base_lr'] * optimizer_cfg['backbone_lr_ratio']
			
 
				-    optimizer.add_param_group({"params": param_dicts[4], "lr": backbone_lr, "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[5], "lr": backbone_lr, "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[6], "lr": backbone_lr, "weight_decay": optimizer_cfg['weight_decay']})
			
 
				-
			
 
				-    start_epoch = 0
			
 
				-    if resume is not None:
			
 
				-        print('keep training: ', resume)
			
 
				+        print('Load optimzier from the checkpoint: ', resume)
			
 
				         checkpoint = torch.load(resume)
			
 
				         # checkpoint state dict
			
 
				         checkpoint_state_dict = checkpoint.pop("optimizer")
			
--- a/odlab/utils/plot_utils.py
+++ b/odlab/utils/plot_utils.py
@@ -1,107 +0,0 @@
 
				-"""
			
 
				-Plotting utilities to visualize training logs.
			
 
				-"""
			
 
				-import torch
			
 
				-import pandas as pd
			
 
				-import numpy as np
			
 
				-import seaborn as sns
			
 
				-import matplotlib.pyplot as plt
			
 
				-
			
 
				-from pathlib import Path, PurePath
			
 
				-
			
 
				-
			
 
				-def plot_logs(logs, fields=('class_error', 'loss_bbox_unscaled', 'mAP'), ewm_col=0, log_name='log.txt'):
			
 
				-    '''
			
 
				-    Function to plot specific fields from training log(s). Plots both training and test results.
			
 
				-
			
 
				-    :: Inputs - logs = list containing Path objects, each pointing to individual dir with a log file
			
 
				-              - fields = which results to plot from each log file - plots both training and test for each field.
			
 
				-              - ewm_col = optional, which column to use as the exponential weighted smoothing of the plots
			
 
				-              - log_name = optional, name of log file if different than default 'log.txt'.
			
 
				-
			
 
				-    :: Outputs - matplotlib plots of results in fields, color coded for each log file.
			
 
				-               - solid lines are training results, dashed lines are test results.
			
 
				-
			
 
				-    '''
			
 
				-    func_name = "plot_utils.py::plot_logs"
			
 
				-
			
 
				-    # verify logs is a list of Paths (list[Paths]) or single Pathlib object Path,
			
 
				-    # convert single Path to list to avoid 'not iterable' error
			
 
				-
			
 
				-    if not isinstance(logs, list):
			
 
				-        if isinstance(logs, PurePath):
			
 
				-            logs = [logs]
			
 
				-            print(f"{func_name} info: logs param expects a list argument, converted to list[Path].")
			
 
				-        else:
			
 
				-            raise ValueError(f"{func_name} - invalid argument for logs parameter.\n \
			
 
				-            Expect list[Path] or single Path obj, received {type(logs)}")
			
 
				-
			
 
				-    # Quality checks - verify valid dir(s), that every item in list is Path object, and that log_name exists in each dir
			
 
				-    for i, dir in enumerate(logs):
			
 
				-        if not isinstance(dir, PurePath):
			
 
				-            raise ValueError(f"{func_name} - non-Path object in logs argument of {type(dir)}: \n{dir}")
			
 
				-        if not dir.exists():
			
 
				-            raise ValueError(f"{func_name} - invalid directory in logs argument:\n{dir}")
			
 
				-        # verify log_name exists
			
 
				-        fn = Path(dir / log_name)
			
 
				-        if not fn.exists():
			
 
				-            print(f"-> missing {log_name}.  Have you gotten to Epoch 1 in training?")
			
 
				-            print(f"--> full path of missing log file: {fn}")
			
 
				-            return
			
 
				-
			
 
				-    # load log file(s) and plot
			
 
				-    dfs = [pd.read_json(Path(p) / log_name, lines=True) for p in logs]
			
 
				-
			
 
				-    fig, axs = plt.subplots(ncols=len(fields), figsize=(16, 5))
			
 
				-
			
 
				-    for df, color in zip(dfs, sns.color_palette(n_colors=len(logs))):
			
 
				-        for j, field in enumerate(fields):
			
 
				-            if field == 'mAP':
			
 
				-                coco_eval = pd.DataFrame(
			
 
				-                    np.stack(df.test_coco_eval_bbox.dropna().values)[:, 1]
			
 
				-                ).ewm(com=ewm_col).mean()
			
 
				-                axs[j].plot(coco_eval, c=color)
			
 
				-            else:
			
 
				-                df.interpolate().ewm(com=ewm_col).mean().plot(
			
 
				-                    y=[f'train_{field}', f'test_{field}'],
			
 
				-                    ax=axs[j],
			
 
				-                    color=[color] * 2,
			
 
				-                    style=['-', '--']
			
 
				-                )
			
 
				-    for ax, field in zip(axs, fields):
			
 
				-        ax.legend([Path(p).name for p in logs])
			
 
				-        ax.set_title(field)
			
 
				-
			
 
				-
			
 
				-def plot_precision_recall(files, naming_scheme='iter'):
			
 
				-    if naming_scheme == 'exp_id':
			
 
				-        # name becomes exp_id
			
 
				-        names = [f.parts[-3] for f in files]
			
 
				-    elif naming_scheme == 'iter':
			
 
				-        names = [f.stem for f in files]
			
 
				-    else:
			
 
				-        raise ValueError(f'not supported {naming_scheme}')
			
 
				-    fig, axs = plt.subplots(ncols=2, figsize=(16, 5))
			
 
				-    for f, color, name in zip(files, sns.color_palette("Blues", n_colors=len(files)), names):
			
 
				-        data = torch.load(f)
			
 
				-        # precision is n_iou, n_points, n_cat, n_area, max_det
			
 
				-        precision = data['precision']
			
 
				-        recall = data['params'].recThrs
			
 
				-        scores = data['scores']
			
 
				-        # take precision for all classes, all areas and 100 detections
			
 
				-        precision = precision[0, :, :, 0, -1].mean(1)
			
 
				-        scores = scores[0, :, :, 0, -1].mean(1)
			
 
				-        prec = precision.mean()
			
 
				-        rec = data['recall'][0, :, 0, -1].mean()
			
 
				-        print(f'{naming_scheme} {name}: mAP@50={prec * 100: 05.1f}, ' +
			
 
				-              f'score={scores.mean():0.3f}, ' +
			
 
				-              f'f1={2 * prec * rec / (prec + rec + 1e-8):0.3f}'
			
 
				-              )
			
 
				-        axs[0].plot(recall, precision, c=color)
			
 
				-        axs[1].plot(recall, scores, c=color)
			
 
				-
			
 
				-    axs[0].set_title('Precision / Recall')
			
 
				-    axs[0].legend(names)
			
 
				-    axs[1].set_title('Scores / Recall')
			
 
				-    axs[1].legend(names)
			
 
				-    return fig, axs
			
--- a/odlab/utils/vis_tools.py
+++ b/odlab/utils/vis_tools.py
@@ -61,7 +61,7 @@ def vis_data(images, targets, masks=None, class_labels=None, normalized_coord=Fa
 
				         cv2.imshow('train target', image)
			
 
				         cv2.waitKey(0)
			
 
				 
			
 
				-## plot bbox & label on image
			
 
				+## Draw bbox & label on the image
			
 
				 def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
			
 
				     x1, y1, x2, y2 = bbox
			
 
				     x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
			
@@ -77,101 +77,14 @@ def plot_bbox_labels(img, bbox, label=None, cls_color=None, text_scale=0.4):
 
				 
			
 
				     return img
			
 
				 
			
 
				-## visualize detection
			
 
				-def visualize(img, 
			
 
				-              bboxes, 
			
 
				-              scores, 
			
 
				-              labels, 
			
 
				-              vis_thresh, 
			
 
				-              class_colors, 
			
 
				-              class_names):
			
 
				+## Visualize the detection results
			
 
				+def visualize(image, bboxes, scores, labels, class_colors, class_names):
			
 
				     ts = 0.4
			
 
				     for i, bbox in enumerate(bboxes):
			
 
				-        if scores[i] > vis_thresh:
			
 
				-            cls_id = int(labels[i])
			
 
				-            cls_color = class_colors[cls_id]
			
 
				-                
			
 
				-            mess = '%s: %.2f' % (class_names[cls_id], scores[i])
			
 
				-            img = plot_bbox_labels(img, bbox, mess, cls_color, text_scale=ts)
			
 
				-
			
 
				-    return img
			
 
				-        
			
 
				-
			
 
				-## convert feature to he heatmap
			
 
				-def convert_feature_heatmap(feature):
			
 
				-    """
			
 
				-        feature: (ndarray) [H, W, C]
			
 
				-    """
			
 
				-    heatmap = None
			
 
				-
			
 
				-    return heatmap
			
 
				-
			
 
				-## draw feature on the image
			
 
				-def draw_feature(img, features, save=None):
			
 
				-    """
			
 
				-        img: (ndarray & cv2.Mat) [H, W, C], where the C is 3 for RGB or 1 for Gray.
			
 
				-        features: (List[ndarray]). It is a list of the multiple feature map whose shape is [H, W, C].
			
 
				-        save: (bool) save the result or not.
			
 
				-    """
			
 
				-    img_h, img_w = img.shape[:2]
			
 
				-
			
 
				-    for i, fmp in enumerate(features):
			
 
				-        hmp = convert_feature_heatmap(fmp)
			
 
				-        hmp = cv2.resize(hmp, (img_w, img_h))
			
 
				-        hmp = hmp.astype(np.uint8)*255
			
 
				-        hmp_rgb = cv2.applyColorMap(hmp, cv2.COLORMAP_JET)
			
 
				-        
			
 
				-        superimposed_img = hmp_rgb * 0.4 + img 
			
 
				-
			
 
				-        # show the heatmap
			
 
				-        plt.imshow(hmp)
			
 
				-        plt.close()
			
 
				-
			
 
				-        # show the image with heatmap
			
 
				-        cv2.imshow("image with heatmap", superimposed_img)
			
 
				-        cv2.waitKey(0)
			
 
				-        cv2.destroyAllWindows()
			
 
				-
			
 
				-        if save:
			
 
				-            save_dir = 'feature_heatmap'
			
 
				-            os.makedirs(save_dir, exist_ok=True)
			
 
				-            cv2.imwrite(os.path.join(save_dir, 'feature_{}.png'.format(i) ), superimposed_img)    
			
 
				-
			
 
				-
			
 
				-# -------------------------- For Tracking Task --------------------------
			
 
				-def get_color(idx):
			
 
				-    idx = idx * 3
			
 
				-    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
			
 
				-
			
 
				-    return color
			
 
				-
			
 
				-
			
 
				-def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
			
 
				-    im = np.ascontiguousarray(np.copy(image))
			
 
				-    im_h, im_w = im.shape[:2]
			
 
				-
			
 
				-    top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
			
 
				-
			
 
				-    #text_scale = max(1, image.shape[1] / 1600.)
			
 
				-    #text_thickness = 2
			
 
				-    #line_thickness = max(1, int(image.shape[1] / 500.))
			
 
				-    text_scale = 2
			
 
				-    text_thickness = 2
			
 
				-    line_thickness = 3
			
 
				-
			
 
				-    radius = max(5, int(im_w/140.))
			
 
				-    cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
			
 
				-                (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2)
			
 
				+        cls_id = int(labels[i])
			
 
				+        cls_color = class_colors[cls_id]
			
 
				+            
			
 
				+        mess = '%s: %.2f' % (class_names[cls_id], scores[i])
			
 
				+        image = plot_bbox_labels(image, bbox, mess, cls_color, text_scale=ts)
			
 
				 
			
 
				-    for i, tlwh in enumerate(tlwhs):
			
 
				-        x1, y1, w, h = tlwh
			
 
				-        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
			
 
				-        obj_id = int(obj_ids[i])
			
 
				-        id_text = '{}'.format(int(obj_id))
			
 
				-        if ids2 is not None:
			
 
				-            id_text = id_text + ', {}'.format(int(ids2[i]))
			
 
				-        color = get_color(abs(obj_id))
			
 
				-        cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
			
 
				-        cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
			
 
				-                    thickness=text_thickness)
			
 
				-    return im
			
 
				+    return image