yjh0410 1 éve
szülő
commit
1b5e49e543

+ 4 - 4
config/__init__.py

@@ -117,16 +117,16 @@ def build_model_config(args):
     elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']:
         cfg = yolov8_cfg[args.model]
     # YOLOX
-    elif args.model in ['yolox_n', 'yolox_t', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
+    elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
         cfg = yolox_cfg[args.model]
     # YOLOX-AdamW
-    elif args.model in ['yolox_n_adamw', 'yolox_t_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
+    elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
         cfg = yolox_adamw_cfg[args.model]
     # RTCDet
-    elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
+    elif args.model in ['rtcdet_n', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
         cfg = rtcdet_cfg[args.model]
     # CenterNet
-    elif args.model in ['ctrnet_n', 'ctrnet_t', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
+    elif args.model in ['ctrnet_n', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
         cfg = ctrnet_cfg[args.model]
 
     return cfg

+ 1 - 0
config/model_config/ctrnet_config.py

@@ -6,6 +6,7 @@ ctrnet_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,

+ 8 - 53
config/model_config/rtcdet_config.py

@@ -7,6 +7,7 @@ rtcdet_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -56,63 +57,11 @@ rtcdet_cfg = {
         'trainer_type': 'rtcdet',
     },
 
-    'rtcdet_t':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'bk_pretrained': True,
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_depthwise': False,
-        'width': 0.375,
-        'depth': 0.34,
-        'ratio': 2.0,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## Neck: SPP
-        'neck': 'sppf',
-        'neck_expand_ratio': 0.5,
-        'pooling_size': 5,
-        'neck_act': 'silu',
-        'neck_norm': 'BN',
-        'neck_depthwise': False,
-        ## Neck: PaFPN
-        'fpn': 'rtcdet_pafpn',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'det_head': {'name': 'decoupled_head',
-                     'num_cls_head': 2,
-                     'num_reg_head': 2,
-                     'head_act': 'silu',
-                     'head_norm': 'BN',
-                     'head_depthwise': False,  
-                     },
-        'seg_head': {'name': None,
-                     },
-        'pos_head': {'name': None,
-                     },
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolox_n',
-        # ---------------- Assignment config ----------------
-        ## Matcher
-        'matcher': "aligned_simota",
-        'matcher_hpy': {'soft_center_radius': 3.0,
-                        'topk_candidates': 13},
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 2.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
     'rtcdet_s':{
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -166,6 +115,7 @@ rtcdet_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -219,6 +169,7 @@ rtcdet_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -272,6 +223,7 @@ rtcdet_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -330,6 +282,7 @@ rtcdet_seg_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -388,6 +341,7 @@ rtcdet_pos_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -446,6 +400,7 @@ rtcdet_seg_pos_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,

+ 0 - 90
config/model_config/yolov5_config.py

@@ -47,51 +47,6 @@ yolov5_cfg = {
         'trainer_type': 'yolov8',
     },
 
-    'yolov5_t':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.375,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_n',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
-    },
-
     'yolov5_s':{
         # ---------------- Model config ----------------
         ## Backbone
@@ -320,51 +275,6 @@ yolov5_adamw_cfg = {
         'trainer_type': 'rtcdet',
     },
 
-    'yolov5_t_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.375,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_n',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
     'yolov5_s_adamw':{
         # ---------------- Model config ----------------
         ## Backbone

+ 10 - 86
config/model_config/yolox_config.py

@@ -6,6 +6,7 @@ yolox_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -45,53 +46,11 @@ yolox_cfg = {
         'trainer_type': 'yolox',
     },
 
-    'yolox_t':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.375,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolox_pafpn',
-        'fpn_reduce_layer': 'conv',
-        'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'cspblock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.7, 1.25],   # 448 -> 800
-        'trans_type': 'yolox_n',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'matcher': {'center_sampling_radius': 2.5,
-                    'topk_candicate': 10},
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'yolox',
-    },
-
     'yolox_s':{
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -135,6 +94,7 @@ yolox_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -178,6 +138,7 @@ yolox_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -221,6 +182,7 @@ yolox_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -267,6 +229,7 @@ yolox_adamw_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -306,53 +269,11 @@ yolox_adamw_cfg = {
         'trainer_type': 'rtcdet',
     },
 
-    'yolox_t_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.375,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolox_pafpn',
-        'fpn_reduce_layer': 'conv',
-        'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'cspblock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolox_n',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'matcher': {'center_sampling_radius': 2.5,
-                    'topk_candicate': 10},
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
     'yolox_s_adamw':{
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -396,6 +317,7 @@ yolox_adamw_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -439,6 +361,7 @@ yolox_adamw_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -482,6 +405,7 @@ yolox_adamw_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'cspdarknet',
+        'bk_pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,

+ 5 - 5
models/detectors/__init__.py

@@ -40,11 +40,11 @@ def build_model(args,
         model, criterion = build_yolov4(
             args, model_cfg, device, num_classes, trainable, deploy)
     # YOLOv5   
-    elif args.model in ['yolov5_n', 'yolov5_t', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
+    elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
         model, criterion = build_yolov5(
             args, model_cfg, device, num_classes, trainable, deploy)
     # YOLOv5-AdamW
-    elif args.model in ['yolov5_n_adamw', 'yolov5_t_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
+    elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
         model, criterion = build_yolov5(
             args, model_cfg, device, num_classes, trainable, deploy)
     # YOLOv7
@@ -56,15 +56,15 @@ def build_model(args,
         model, criterion = build_yolov8(
             args, model_cfg, device, num_classes, trainable, deploy)
     # YOLOX
-    elif args.model in ['yolox_n', 'yolox_t', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
+    elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
         model, criterion = build_yolox(
             args, model_cfg, device, num_classes, trainable, deploy)
     # YOLOX-AdamW
-    elif args.model in ['yolox_n_adamw', 'yolox_t_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
+    elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
         model, criterion = build_yolox(
             args, model_cfg, device, num_classes, trainable, deploy)
     # RTCDet
-    elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
+    elif args.model in ['rtcdet_n', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
         model, criterion = build_rtcdet(
             args, model_cfg, device, num_classes, trainable, deploy)
     # CenterNet

+ 82 - 12
models/detectors/ctrnet/ctrnet_encoder.py

@@ -7,11 +7,28 @@ except:
     from ctrnet_basic import Conv, RTCBlock
 
 
+# Pretrained weights
+model_urls = {
+    # ImageNet-1K pretrained weight
+    "rtcnet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_nano.pth",
+    "rtcnet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_small.pth",
+    "rtcnet_m": None,
+    "rtcnet_l": None,
+    "rtcnet_x": None,
+    # MIM-pretrained weights
+    "mae_rtcnet_n": None,
+    "mae_rtcnet_s": None,
+    "mae_rtcnet_m": None,
+    "mae_rtcnet_l": None,
+    "mae_rtcnet_x": None,
+}
+
+
 # ---------------------------- Basic functions ----------------------------
 ## Real-time Convolutional Backbone
-class CTREncoder(nn.Module):
+class RTCBackbone(nn.Module):
     def __init__(self, width=1.0, depth=1.0, ratio=1.0, act_type='silu', norm_type='BN', depthwise=False):
-        super(CTREncoder, self).__init__()
+        super(RTCBackbone, self).__init__()
         # ---------------- Basic parameters ----------------
         self.width_factor = width
         self.depth_factor = depth
@@ -78,25 +95,78 @@ class CTREncoder(nn.Module):
 
 
 # ---------------------------- Functions ----------------------------
-## build Backbone
-def build_encoder(cfg): 
+## Build Backbone network
+def build_encoder(cfg, pretrained=False): 
     # build backbone model
-    backbone = CTREncoder(width=cfg['width'],
-                          depth=cfg['depth'],
-                          ratio=cfg['ratio'],
-                          act_type=cfg['bk_act'],
-                          norm_type=cfg['bk_norm'],
-                          depthwise=cfg['bk_depthwise']
-                          )
+    backbone = RTCBackbone(width=cfg['width'],
+                           depth=cfg['depth'],
+                           ratio=cfg['ratio'],
+                           act_type=cfg['bk_act'],
+                           norm_type=cfg['bk_norm'],
+                           depthwise=cfg['bk_depthwise']
+                           )
     feat_dims = backbone.feat_dims[-3:]
+
+    # Model name
+    width, depth, ratio = cfg['width'], cfg['depth'], cfg['ratio']
+    model_name = "{}" if not cfg['bk_mae_pretrained'] else "mae_{}"
+    if  width == 0.25   and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_n")
+    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_t")
+    elif width == 0.50  and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_s")
+    elif width == 0.75  and depth == 0.67 and ratio == 1.5:
+        model_name = model_name.format("rtcnet_m")
+    elif width == 1.0   and depth == 1.0  and ratio == 1.0:
+        model_name = model_name.format("rtcnet_l")
+    elif width == 1.25  and depth == 1.34  and ratio == 1.0:
+        model_name = model_name.format("rtcnet_x")
+    else:
+        raise NotImplementedError("No such model size : width={}, depth={}, ratio={}. ".format(width, depth, ratio))
+
+    # Load pretrained weight
+    if pretrained:
+        backbone = load_pretrained_weight(backbone, model_name)
         
     return backbone, feat_dims
 
+## Load pretrained weight
+def load_pretrained_weight(model, model_name):
+    # Load pretrained weight
+    url = model_urls[model_name]
+    if url is not None:
+        print('Loading pretrained weight ...')
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url=url, map_location="cpu", check_hash=True)
+        # checkpoint state dict
+        checkpoint_state_dict = checkpoint.pop("model")
+        # model state dict
+        model_state_dict = model.state_dict()
+        # check
+        for k in list(checkpoint_state_dict.keys()):
+            if k in model_state_dict:
+                shape_model = tuple(model_state_dict[k].shape)
+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
+                if shape_model != shape_checkpoint:
+                    checkpoint_state_dict.pop(k)
+            else:
+                checkpoint_state_dict.pop(k)
+                print(k)
+        # load the weight
+        model.load_state_dict(checkpoint_state_dict)
+    else:
+        print('No backbone pretrained for {}.'.format(model_name))
+
+    return model
+
 
 if __name__ == '__main__':
     import time
     from thop import profile
     cfg = {
+        'bk_pretrained': True,
+        'bk_mae_pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
@@ -104,7 +174,7 @@ if __name__ == '__main__':
         'depth': 1.0,
         'ratio': 1.0,
     }
-    model, feats = build_encoder(cfg)
+    model, feats = build_encoder(cfg, pretrained=cfg['bk_pretrained'])
     x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)

+ 0 - 1
models/detectors/rtcdet/README.md

@@ -3,7 +3,6 @@
 |   Model  | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 | RTCDet-N | 8xb16 |  640  |                        |                   |                   |                    |  |
-| RTCDet-T | 8xb16 |  640  |                        |                   |                   |                    |  |
 | RTCDet-S | 8xb16 |  640  |                        |                   |                   |                    |  |
 | RTCDet-M | 8xb16 |  640  |                        |                   |                   |                    |  |
 | RTCDet-L | 8xb16 |  640  |                        |                   |                   |                    |  |

+ 39 - 28
models/detectors/rtcdet/rtcdet_backbone.py

@@ -7,14 +7,20 @@ except:
     from rtcdet_basic import Conv, RTCBlock
 
 
-# MIM-pretrained weights
+# Pretrained weights
 model_urls = {
-    "rtcnet_n": None,
-    "rtcnet_t": None,
-    "rtcnet_s": None,
+    # ImageNet-1K pretrained weight
+    "rtcnet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_nano.pth",
+    "rtcnet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_small.pth",
     "rtcnet_m": None,
     "rtcnet_l": None,
     "rtcnet_x": None,
+    # MIM-pretrained weights
+    "mae_rtcnet_n": None,
+    "mae_rtcnet_s": None,
+    "mae_rtcnet_m": None,
+    "mae_rtcnet_l": None,
+    "mae_rtcnet_x": None,
 }
 
 
@@ -89,7 +95,7 @@ class RTCBackbone(nn.Module):
 
 
 # ---------------------------- Functions ----------------------------
-## build Backbone
+## Build Backbone network
 def build_backbone(cfg, pretrained=False): 
     # build backbone model
     backbone = RTCBackbone(width=cfg['width'],
@@ -101,29 +107,32 @@ def build_backbone(cfg, pretrained=False):
                            )
     feat_dims = backbone.feat_dims[-3:]
 
-    # load pretrained weight
+    # Model name
+    width, depth, ratio = cfg['width'], cfg['depth'], cfg['ratio']
+    model_name = "{}" if not cfg['bk_pretrained_mae'] else "mae_{}"
+    if  width == 0.25   and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_n")
+    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_t")
+    elif width == 0.50  and depth == 0.34 and ratio == 2.0:
+        model_name = model_name.format("rtcnet_s")
+    elif width == 0.75  and depth == 0.67 and ratio == 1.5:
+        model_name = model_name.format("rtcnet_m")
+    elif width == 1.0   and depth == 1.0  and ratio == 1.0:
+        model_name = model_name.format("rtcnet_l")
+    elif width == 1.25  and depth == 1.34  and ratio == 1.0:
+        model_name = model_name.format("rtcnet_x")
+    else:
+        raise NotImplementedError("No such model size : width={}, depth={}, ratio={}. ".format(width, depth, ratio))
+
+    # Load pretrained weight
     if pretrained:
-        backbone = load_pretrained_weight(backbone)
+        backbone = load_pretrained_weight(backbone, model_name)
         
     return backbone, feat_dims
 
-
-def load_pretrained_weight(model):
-    # Model name
-    width, depth, ratio = model.width_factor, model.depth_factor, model.last_stage_factor
-    if width == 0.25 and depth == 0.34 and ratio == 2.0:
-        model_name = "rtcnet_n"
-    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
-        model_name = "rtcnet_t"
-    elif width == 0.50 and depth == 0.34 and ratio == 2.0:
-        model_name = "rtcnet_s"
-    elif width == 0.75 and depth == 0.67 and ratio == 1.5:
-        model_name = "rtcnet_m"
-    elif width == 1.0 and depth == 1.0 and ratio == 1.0:
-        model_name = "rtcnet_l"
-    elif width == 1.25 and depth == 1.34 and ratio == 1.0:
-        model_name = "rtcnet_x"
-    
+## Load pretrained weight
+def load_pretrained_weight(model, model_name):
     # Load pretrained weight
     url = model_urls[model_name]
     if url is not None:
@@ -156,14 +165,16 @@ if __name__ == '__main__':
     import time
     from thop import profile
     cfg = {
+        'bk_pretrained': True,
+        'bk_pretrained_mae': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'width': 1.0,
-        'depth': 1.0,
-        'ratio': 1.0,
+        'width': 0.25,
+        'depth': 0.34,
+        'ratio': 2.0,
     }
-    model, feats = build_backbone(cfg)
+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
     x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)

+ 7 - 7
models/detectors/rtcdet/rtcdet_basic.py

@@ -116,22 +116,22 @@ class RTCBlock(nn.Module):
                  depthwise  = False,):
         super(RTCBlock, self).__init__()
         self.inter_dim = out_dim // 2
-        self.input_proj = Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.cv1 = Conv(in_dim, self.inter_dim, k=1, norm_type=norm_type, act_type=act_type)
+        self.cv2 = Conv(in_dim, self.inter_dim, k=1, norm_type=norm_type, act_type=act_type)
         self.m = nn.Sequential(*(
             Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
             for _ in range(num_blocks)))
-        self.output_proj = Conv((2 + num_blocks) * self.inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.cv3 = Conv((2 + num_blocks) * self.inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+
 
     def forward(self, x):
-        # Input proj
-        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
+        x1 = self.cv1(x)
+        x2 = self.cv2(x)
         out = list([x1, x2])
 
-        # Bottlenecl
         out.extend(m(out[-1]) for m in self.m)
 
-        # Output proj
-        out = self.output_proj(torch.cat(out, dim=1))
+        out = self.cv3(torch.cat(out, dim=1))
 
         return out
     

+ 0 - 1
models/detectors/yolov5/README.md

@@ -19,7 +19,6 @@ On the other hand, we are trying to use **AdamW** and larger batch size to train
 |   Model   | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 | YOLOv5-N  | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOv5-T  | 8xb16 |  640  |                        |                   |                   |                    |  |
 | YOLOv5-S  | 8xb16 |  640  |         39.2           |        57.9       |        27.3       |         9.0        | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov5_s_coco_adamw.pth) |
 | YOLOv5-M  | 8xb16 |  640  |                        |                   |                   |                    |  |
 | YOLOv5-L  | 8xb16 |  640  |                        |                   |                   |                    |  |

+ 1 - 1
models/detectors/yolov5/yolov5.py

@@ -45,7 +45,7 @@ class YOLOv5(nn.Module):
         
         # ------------------- Network Structure -------------------
         ## Backbone
-        self.backbone, feats_dim = build_backbone(cfg)
+        self.backbone, feats_dim = build_backbone(cfg, pretrained=cfg['bk_pretrained']&trainable)
         
         ## FPN
         self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))

+ 62 - 6
models/detectors/yolov5/yolov5_backbone.py

@@ -9,6 +9,16 @@ except:
     from yolov5_neck import SPPF
 
 
+# ImageNet-1K pretrained weight
+model_urls = {
+    "cspdarknet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_nano.pth",
+    "cspdarknet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_small.pth",
+    "cspdarknet_m": None,  # For Medium-level, it is not necessary to load pretrained weight.
+    "cspdarknet_l": None,  # For Large-level,  it is not necessary to load pretrained weight.
+    "cspdarknet_x": None,  # For Huge-level,   it is not necessary to load pretrained weight.
+}
+
+
 # CSPDarkNet
 class CSPDarkNet(nn.Module):
     def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False):
@@ -80,11 +90,57 @@ class CSPDarkNet(nn.Module):
 
 
 # ---------------------------- Functions ----------------------------
+## load pretrained weight
+def load_weight(model, model_name):
+    # load weight
+    print('Loading pretrained weight ...')
+    url = model_urls[model_name]
+    if url is not None:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url=url, map_location="cpu", check_hash=True)
+        # checkpoint state dict
+        checkpoint_state_dict = checkpoint.pop("model")
+        # model state dict
+        model_state_dict = model.state_dict()
+        # check
+        for k in list(checkpoint_state_dict.keys()):
+            if k in model_state_dict:
+                shape_model = tuple(model_state_dict[k].shape)
+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
+                if shape_model != shape_checkpoint:
+                    checkpoint_state_dict.pop(k)
+            else:
+                checkpoint_state_dict.pop(k)
+                print(k)
+
+        model.load_state_dict(checkpoint_state_dict)
+    else:
+        print('No pretrained for {}'.format(model_name))
+
+    return model
+
+
 ## build CSPDarkNet
-def build_backbone(cfg): 
+def build_backbone(cfg, pretrained=False): 
+    # Build backbone
     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     feat_dims = backbone.feat_dims[-3:]
 
+    # Load pretrained weight
+    if pretrained:
+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_n')
+        elif cfg['width'] == 0.375 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_t')
+        elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_s')
+        elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
+            backbone = load_weight(backbone, model_name='cspdarknet_m')
+        elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
+            backbone = load_weight(backbone, model_name='cspdarknet_l')
+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_x')
+
     return backbone, feat_dims
 
 
@@ -92,16 +148,16 @@ if __name__ == '__main__':
     import time
     from thop import profile
     cfg = {
-        'pretrained': False,
-        'bk_act': 'lrelu',
+        'bk_pretrained': True,
+        'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
         'p6_feat': False,
         'p7_feat': False,
-        'width': 1.0,
-        'depth': 1.0,
+        'width': 0.50,
+        'depth': 0.34,
     }
-    model, feats = build_backbone(cfg)
+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
     x = torch.randn(1, 3, 224, 224)
     t0 = time.time()
     outputs = model(x)

+ 4 - 1
models/detectors/yolov5/yolov5_neck.py

@@ -1,6 +1,9 @@
 import torch
 import torch.nn as nn
-from .yolov5_basic import Conv
+try:
+    from .yolov5_basic import Conv
+except:
+    from yolov5_basic import Conv
 
 
 # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher

+ 0 - 1
models/detectors/yolox/README.md

@@ -17,7 +17,6 @@ On the other hand, we are trying to use **AdamW** to train our reproduced YOLOX.
 |   Model | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |---------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 | YOLOX-N | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOX-T | 8xb16 |  640  |                        |                   |                   |                    |  |
 | YOLOX-S | 8xb16 |  640  |                        |                   |                   |                    |  |
 | YOLOX-M | 8xb16 |  640  |                        |                   |                   |                    |  |
 | YOLOX-L | 8xb16 |  640  |                        |                   |                   |                    |  |

+ 1 - 1
models/detectors/yolox/yolox.py

@@ -37,7 +37,7 @@ class YOLOX(nn.Module):
                 
         # ------------------- Network Structure -------------------
         ## 主干网络
-        self.backbone, feats_dim = build_backbone(cfg)
+        self.backbone, feats_dim = build_backbone(cfg, pretrained=cfg['bk_pretrained']&trainable)
         
         ## 特征金字塔
         self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))

+ 57 - 2
models/detectors/yolox/yolox_backbone.py

@@ -9,6 +9,16 @@ except:
     from yolox_neck import SPPF
 
 
+# ImageNet-1K pretrained weight
+model_urls = {
+    "cspdarknet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_n.pth",
+    "cspdarknet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_s.pth",
+    "cspdarknet_m": None,  # For Medium-level, it is not necessary to load pretrained weight.
+    "cspdarknet_l": None,  # For Large-level,  it is not necessary to load pretrained weight.
+    "cspdarknet_x": None,  # For Huge-level,   it is not necessary to load pretrained weight.
+}
+
+
 # CSPDarkNet
 class CSPDarkNet(nn.Module):
     def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False):
@@ -80,11 +90,55 @@ class CSPDarkNet(nn.Module):
 
 
 # ---------------------------- Functions ----------------------------
+## load pretrained weight
+def load_weight(model, model_name):
+    # load weight
+    print('Loading pretrained weight ...')
+    url = model_urls[model_name]
+    if url is not None:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url=url, map_location="cpu", check_hash=True)
+        # checkpoint state dict
+        checkpoint_state_dict = checkpoint.pop("model")
+        # model state dict
+        model_state_dict = model.state_dict()
+        # check
+        for k in list(checkpoint_state_dict.keys()):
+            if k in model_state_dict:
+                shape_model = tuple(model_state_dict[k].shape)
+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
+                if shape_model != shape_checkpoint:
+                    checkpoint_state_dict.pop(k)
+            else:
+                checkpoint_state_dict.pop(k)
+                print(k)
+
+        model.load_state_dict(checkpoint_state_dict)
+    else:
+        print('No pretrained for {}'.format(model_name))
+
+    return model
+
+
 ## build CSPDarkNet
-def build_backbone(cfg): 
+def build_backbone(cfg, pretrained=False): 
+    # Build backbone
     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     feat_dims = backbone.feat_dims[-3:]
 
+    # Load pretrained weight
+    if pretrained:
+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_n')
+        elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_s')
+        elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
+            backbone = load_weight(backbone, model_name='cspdarknet_m')
+        elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
+            backbone = load_weight(backbone, model_name='cspdarknet_l')
+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_x')
+
     return backbone, feat_dims
 
 
@@ -92,6 +146,7 @@ if __name__ == '__main__':
     import time
     from thop import profile
     cfg = {
+        'bk_pretrained': True,
         'bk_act': 'lrelu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -100,7 +155,7 @@ if __name__ == '__main__':
         'width': 1.0,
         'depth': 1.0,
     }
-    model, feats = build_backbone(cfg)
+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
     x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)