1 éve · 1b5e49e543
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -117,16 +117,16 @@ def build_model_config(args):
 
				     elif args.model in ['yolov8_n', 'yolov8_s', 'yolov8_m', 'yolov8_l', 'yolov8_x']:
			
 
				         cfg = yolov8_cfg[args.model]
			
 
				     # YOLOX
			
 
				-    elif args.model in ['yolox_n', 'yolox_t', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
			
 
				+    elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
			
 
				         cfg = yolox_cfg[args.model]
			
 
				     # YOLOX-AdamW
			
 
				-    elif args.model in ['yolox_n_adamw', 'yolox_t_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
			
 
				+    elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
			
 
				         cfg = yolox_adamw_cfg[args.model]
			
 
				     # RTCDet
			
 
				-    elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				+    elif args.model in ['rtcdet_n', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				         cfg = rtcdet_cfg[args.model]
			
 
				     # CenterNet
			
 
				-    elif args.model in ['ctrnet_n', 'ctrnet_t', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
			
 
				+    elif args.model in ['ctrnet_n', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
			
 
				         cfg = ctrnet_cfg[args.model]
			
 
				 
			
 
				     return cfg
			
--- a/config/model_config/ctrnet_config.py
+++ b/config/model_config/ctrnet_config.py
@@ -6,6 +6,7 @@ ctrnet_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
--- a/config/model_config/rtcdet_config.py
+++ b/config/model_config/rtcdet_config.py
@@ -7,6 +7,7 @@ rtcdet_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -56,63 +57,11 @@ rtcdet_cfg = {
 
				         'trainer_type': 'rtcdet',
			
 
				     },
			
 
				 
			
 
				-    'rtcdet_t':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'bk_pretrained': True,
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_depthwise': False,
			
 
				-        'width': 0.375,
			
 
				-        'depth': 0.34,
			
 
				-        'ratio': 2.0,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## Neck: SPP
			
 
				-        'neck': 'sppf',
			
 
				-        'neck_expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				-        'neck_act': 'silu',
			
 
				-        'neck_norm': 'BN',
			
 
				-        'neck_depthwise': False,
			
 
				-        ## Neck: PaFPN
			
 
				-        'fpn': 'rtcdet_pafpn',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'det_head': {'name': 'decoupled_head',
			
 
				-                     'num_cls_head': 2,
			
 
				-                     'num_reg_head': 2,
			
 
				-                     'head_act': 'silu',
			
 
				-                     'head_norm': 'BN',
			
 
				-                     'head_depthwise': False,  
			
 
				-                     },
			
 
				-        'seg_head': {'name': None,
			
 
				-                     },
			
 
				-        'pos_head': {'name': None,
			
 
				-                     },
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolox_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## Matcher
			
 
				-        'matcher': "aligned_simota",
			
 
				-        'matcher_hpy': {'soft_center_radius': 3.0,
			
 
				-                        'topk_candidates': 13},
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 2.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				     'rtcdet_s':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -166,6 +115,7 @@ rtcdet_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -219,6 +169,7 @@ rtcdet_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -272,6 +223,7 @@ rtcdet_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -330,6 +282,7 @@ rtcdet_seg_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -388,6 +341,7 @@ rtcdet_pos_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -446,6 +400,7 @@ rtcdet_seg_pos_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
--- a/config/model_config/yolov5_config.py
+++ b/config/model_config/yolov5_config.py
@@ -47,51 +47,6 @@ yolov5_cfg = {
 
				         'trainer_type': 'yolov8',
			
 
				     },
			
 
				 
			
 
				-    'yolov5_t':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.375,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				-    },
			
 
				-
			
 
				     'yolov5_s':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
@@ -320,51 +275,6 @@ yolov5_adamw_cfg = {
 
				         'trainer_type': 'rtcdet',
			
 
				     },
			
 
				 
			
 
				-    'yolov5_t_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.375,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				     'yolov5_s_adamw':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
--- a/config/model_config/yolox_config.py
+++ b/config/model_config/yolox_config.py
@@ -6,6 +6,7 @@ yolox_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -45,53 +46,11 @@ yolox_cfg = {
 
				         'trainer_type': 'yolox',
			
 
				     },
			
 
				 
			
 
				-    'yolox_t':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.375,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolox_pafpn',
			
 
				-        'fpn_reduce_layer': 'conv',
			
 
				-        'fpn_downsample_layer': 'conv',
			
 
				-        'fpn_core_block': 'cspblock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.7, 1.25],   # 448 -> 800
			
 
				-        'trans_type': 'yolox_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'matcher': {'center_sampling_radius': 2.5,
			
 
				-                    'topk_candicate': 10},
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolox',
			
 
				-    },
			
 
				-
			
 
				     'yolox_s':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -135,6 +94,7 @@ yolox_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -178,6 +138,7 @@ yolox_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -221,6 +182,7 @@ yolox_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -267,6 +229,7 @@ yolox_adamw_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -306,53 +269,11 @@ yolox_adamw_cfg = {
 
				         'trainer_type': 'rtcdet',
			
 
				     },
			
 
				 
			
 
				-    'yolox_t_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.375,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolox_pafpn',
			
 
				-        'fpn_reduce_layer': 'conv',
			
 
				-        'fpn_downsample_layer': 'conv',
			
 
				-        'fpn_core_block': 'cspblock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolox_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'matcher': {'center_sampling_radius': 2.5,
			
 
				-                    'topk_candicate': 10},
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				     'yolox_s_adamw':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -396,6 +317,7 @@ yolox_adamw_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -439,6 +361,7 @@ yolox_adamw_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -482,6 +405,7 @@ yolox_adamw_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'cspdarknet',
			
 
				+        'bk_pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
--- a/models/detectors/__init__.py
+++ b/models/detectors/__init__.py
@@ -40,11 +40,11 @@ def build_model(args,
 
				         model, criterion = build_yolov4(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # YOLOv5   
			
 
				-    elif args.model in ['yolov5_n', 'yolov5_t', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
			
 
				+    elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
			
 
				         model, criterion = build_yolov5(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # YOLOv5-AdamW
			
 
				-    elif args.model in ['yolov5_n_adamw', 'yolov5_t_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
			
 
				+    elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
			
 
				         model, criterion = build_yolov5(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # YOLOv7
			
@@ -56,15 +56,15 @@ def build_model(args,
 
				         model, criterion = build_yolov8(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # YOLOX
			
 
				-    elif args.model in ['yolox_n', 'yolox_t', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
			
 
				+    elif args.model in ['yolox_n', 'yolox_s', 'yolox_m', 'yolox_l', 'yolox_x']:
			
 
				         model, criterion = build_yolox(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # YOLOX-AdamW
			
 
				-    elif args.model in ['yolox_n_adamw', 'yolox_t_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
			
 
				+    elif args.model in ['yolox_n_adamw', 'yolox_s_adamw', 'yolox_m_adamw', 'yolox_l_adamw', 'yolox_x_adamw']:
			
 
				         model, criterion = build_yolox(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # RTCDet
			
 
				-    elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				+    elif args.model in ['rtcdet_n', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				         model, criterion = build_rtcdet(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				     # CenterNet
			
--- a/models/detectors/ctrnet/ctrnet_encoder.py
+++ b/models/detectors/ctrnet/ctrnet_encoder.py
@@ -7,11 +7,28 @@ except:
 
				     from ctrnet_basic import Conv, RTCBlock
			
 
				 
			
 
				 
			
 
				+# Pretrained weights
			
 
				+model_urls = {
			
 
				+    # ImageNet-1K pretrained weight
			
 
				+    "rtcnet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_nano.pth",
			
 
				+    "rtcnet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_small.pth",
			
 
				+    "rtcnet_m": None,
			
 
				+    "rtcnet_l": None,
			
 
				+    "rtcnet_x": None,
			
 
				+    # MIM-pretrained weights
			
 
				+    "mae_rtcnet_n": None,
			
 
				+    "mae_rtcnet_s": None,
			
 
				+    "mae_rtcnet_m": None,
			
 
				+    "mae_rtcnet_l": None,
			
 
				+    "mae_rtcnet_x": None,
			
 
				+}
			
 
				+
			
 
				+
			
 
				 # ---------------------------- Basic functions ----------------------------
			
 
				 ## Real-time Convolutional Backbone
			
 
				-class CTREncoder(nn.Module):
			
 
				+class RTCBackbone(nn.Module):
			
 
				     def __init__(self, width=1.0, depth=1.0, ratio=1.0, act_type='silu', norm_type='BN', depthwise=False):
			
 
				-        super(CTREncoder, self).__init__()
			
 
				+        super(RTCBackbone, self).__init__()
			
 
				         # ---------------- Basic parameters ----------------
			
 
				         self.width_factor = width
			
 
				         self.depth_factor = depth
			
@@ -78,25 +95,78 @@ class CTREncoder(nn.Module):
 
				 
			
 
				 
			
 
				 # ---------------------------- Functions ----------------------------
			
 
				-## build Backbone
			
 
				-def build_encoder(cfg): 
			
 
				+## Build Backbone network
			
 
				+def build_encoder(cfg, pretrained=False): 
			
 
				     # build backbone model
			
 
				-    backbone = CTREncoder(width=cfg['width'],
			
 
				-                          depth=cfg['depth'],
			
 
				-                          ratio=cfg['ratio'],
			
 
				-                          act_type=cfg['bk_act'],
			
 
				-                          norm_type=cfg['bk_norm'],
			
 
				-                          depthwise=cfg['bk_depthwise']
			
 
				-                          )
			
 
				+    backbone = RTCBackbone(width=cfg['width'],
			
 
				+                           depth=cfg['depth'],
			
 
				+                           ratio=cfg['ratio'],
			
 
				+                           act_type=cfg['bk_act'],
			
 
				+                           norm_type=cfg['bk_norm'],
			
 
				+                           depthwise=cfg['bk_depthwise']
			
 
				+                           )
			
 
				     feat_dims = backbone.feat_dims[-3:]
			
 
				+
			
 
				+    # Model name
			
 
				+    width, depth, ratio = cfg['width'], cfg['depth'], cfg['ratio']
			
 
				+    model_name = "{}" if not cfg['bk_mae_pretrained'] else "mae_{}"
			
 
				+    if  width == 0.25   and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_n")
			
 
				+    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_t")
			
 
				+    elif width == 0.50  and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_s")
			
 
				+    elif width == 0.75  and depth == 0.67 and ratio == 1.5:
			
 
				+        model_name = model_name.format("rtcnet_m")
			
 
				+    elif width == 1.0   and depth == 1.0  and ratio == 1.0:
			
 
				+        model_name = model_name.format("rtcnet_l")
			
 
				+    elif width == 1.25  and depth == 1.34  and ratio == 1.0:
			
 
				+        model_name = model_name.format("rtcnet_x")
			
 
				+    else:
			
 
				+        raise NotImplementedError("No such model size : width={}, depth={}, ratio={}. ".format(width, depth, ratio))
			
 
				+
			
 
				+    # Load pretrained weight
			
 
				+    if pretrained:
			
 
				+        backbone = load_pretrained_weight(backbone, model_name)
			
 
				         
			
 
				     return backbone, feat_dims
			
 
				 
			
 
				+## Load pretrained weight
			
 
				+def load_pretrained_weight(model, model_name):
			
 
				+    # Load pretrained weight
			
 
				+    url = model_urls[model_name]
			
 
				+    if url is not None:
			
 
				+        print('Loading pretrained weight ...')
			
 
				+        checkpoint = torch.hub.load_state_dict_from_url(
			
 
				+            url=url, map_location="cpu", check_hash=True)
			
 
				+        # checkpoint state dict
			
 
				+        checkpoint_state_dict = checkpoint.pop("model")
			
 
				+        # model state dict
			
 
				+        model_state_dict = model.state_dict()
			
 
				+        # check
			
 
				+        for k in list(checkpoint_state_dict.keys()):
			
 
				+            if k in model_state_dict:
			
 
				+                shape_model = tuple(model_state_dict[k].shape)
			
 
				+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
			
 
				+                if shape_model != shape_checkpoint:
			
 
				+                    checkpoint_state_dict.pop(k)
			
 
				+            else:
			
 
				+                checkpoint_state_dict.pop(k)
			
 
				+                print(k)
			
 
				+        # load the weight
			
 
				+        model.load_state_dict(checkpoint_state_dict)
			
 
				+    else:
			
 
				+        print('No backbone pretrained for {}.'.format(model_name))
			
 
				+
			
 
				+    return model
			
 
				+
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     import time
			
 
				     from thop import profile
			
 
				     cfg = {
			
 
				+        'bk_pretrained': True,
			
 
				+        'bk_mae_pretrained': True,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
@@ -104,7 +174,7 @@ if __name__ == '__main__':
 
				         'depth': 1.0,
			
 
				         'ratio': 1.0,
			
 
				     }
			
 
				-    model, feats = build_encoder(cfg)
			
 
				+    model, feats = build_encoder(cfg, pretrained=cfg['bk_pretrained'])
			
 
				     x = torch.randn(1, 3, 640, 640)
			
 
				     t0 = time.time()
			
 
				     outputs = model(x)
			
--- a/models/detectors/rtcdet/README.md
+++ b/models/detectors/rtcdet/README.md
@@ -3,7 +3,6 @@
 
				 |   Model  | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				 |----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				 | RTCDet-N | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				-| RTCDet-T | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | RTCDet-S | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | RTCDet-M | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | RTCDet-L | 8xb16 |  640  |                        |                   |                   |                    |  |
			
--- a/models/detectors/rtcdet/rtcdet_backbone.py
+++ b/models/detectors/rtcdet/rtcdet_backbone.py
@@ -7,14 +7,20 @@ except:
 
				     from rtcdet_basic import Conv, RTCBlock
			
 
				 
			
 
				 
			
 
				-# MIM-pretrained weights
			
 
				+# Pretrained weights
			
 
				 model_urls = {
			
 
				-    "rtcnet_n": None,
			
 
				-    "rtcnet_t": None,
			
 
				-    "rtcnet_s": None,
			
 
				+    # ImageNet-1K pretrained weight
			
 
				+    "rtcnet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_nano.pth",
			
 
				+    "rtcnet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/elan_cspnet_small.pth",
			
 
				     "rtcnet_m": None,
			
 
				     "rtcnet_l": None,
			
 
				     "rtcnet_x": None,
			
 
				+    # MIM-pretrained weights
			
 
				+    "mae_rtcnet_n": None,
			
 
				+    "mae_rtcnet_s": None,
			
 
				+    "mae_rtcnet_m": None,
			
 
				+    "mae_rtcnet_l": None,
			
 
				+    "mae_rtcnet_x": None,
			
 
				 }
			
 
				 
			
 
				 
			
@@ -89,7 +95,7 @@ class RTCBackbone(nn.Module):
 
				 
			
 
				 
			
 
				 # ---------------------------- Functions ----------------------------
			
 
				-## build Backbone
			
 
				+## Build Backbone network
			
 
				 def build_backbone(cfg, pretrained=False): 
			
 
				     # build backbone model
			
 
				     backbone = RTCBackbone(width=cfg['width'],
			
@@ -101,29 +107,32 @@ def build_backbone(cfg, pretrained=False):
 
				                            )
			
 
				     feat_dims = backbone.feat_dims[-3:]
			
 
				 
			
 
				-    # load pretrained weight
			
 
				+    # Model name
			
 
				+    width, depth, ratio = cfg['width'], cfg['depth'], cfg['ratio']
			
 
				+    model_name = "{}" if not cfg['bk_pretrained_mae'] else "mae_{}"
			
 
				+    if  width == 0.25   and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_n")
			
 
				+    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_t")
			
 
				+    elif width == 0.50  and depth == 0.34 and ratio == 2.0:
			
 
				+        model_name = model_name.format("rtcnet_s")
			
 
				+    elif width == 0.75  and depth == 0.67 and ratio == 1.5:
			
 
				+        model_name = model_name.format("rtcnet_m")
			
 
				+    elif width == 1.0   and depth == 1.0  and ratio == 1.0:
			
 
				+        model_name = model_name.format("rtcnet_l")
			
 
				+    elif width == 1.25  and depth == 1.34  and ratio == 1.0:
			
 
				+        model_name = model_name.format("rtcnet_x")
			
 
				+    else:
			
 
				+        raise NotImplementedError("No such model size : width={}, depth={}, ratio={}. ".format(width, depth, ratio))
			
 
				+
			
 
				+    # Load pretrained weight
			
 
				     if pretrained:
			
 
				-        backbone = load_pretrained_weight(backbone)
			
 
				+        backbone = load_pretrained_weight(backbone, model_name)
			
 
				         
			
 
				     return backbone, feat_dims
			
 
				 
			
 
				-
			
 
				-def load_pretrained_weight(model):
			
 
				-    # Model name
			
 
				-    width, depth, ratio = model.width_factor, model.depth_factor, model.last_stage_factor
			
 
				-    if width == 0.25 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_n"
			
 
				-    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_t"
			
 
				-    elif width == 0.50 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_s"
			
 
				-    elif width == 0.75 and depth == 0.67 and ratio == 1.5:
			
 
				-        model_name = "rtcnet_m"
			
 
				-    elif width == 1.0 and depth == 1.0 and ratio == 1.0:
			
 
				-        model_name = "rtcnet_l"
			
 
				-    elif width == 1.25 and depth == 1.34 and ratio == 1.0:
			
 
				-        model_name = "rtcnet_x"
			
 
				-    
			
 
				+## Load pretrained weight
			
 
				+def load_pretrained_weight(model, model_name):
			
 
				     # Load pretrained weight
			
 
				     url = model_urls[model_name]
			
 
				     if url is not None:
			
@@ -156,14 +165,16 @@ if __name__ == '__main__':
 
				     import time
			
 
				     from thop import profile
			
 
				     cfg = {
			
 
				+        'bk_pretrained': True,
			
 
				+        'bk_pretrained_mae': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_depthwise': False,
			
 
				-        'width': 1.0,
			
 
				-        'depth': 1.0,
			
 
				-        'ratio': 1.0,
			
 
				+        'width': 0.25,
			
 
				+        'depth': 0.34,
			
 
				+        'ratio': 2.0,
			
 
				     }
			
 
				-    model, feats = build_backbone(cfg)
			
 
				+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
			
 
				     x = torch.randn(1, 3, 640, 640)
			
 
				     t0 = time.time()
			
 
				     outputs = model(x)
			
--- a/models/detectors/rtcdet/rtcdet_basic.py
+++ b/models/detectors/rtcdet/rtcdet_basic.py
@@ -116,22 +116,22 @@ class RTCBlock(nn.Module):
 
				                  depthwise  = False,):
			
 
				         super(RTCBlock, self).__init__()
			
 
				         self.inter_dim = out_dim // 2
			
 
				-        self.input_proj = Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				+        self.cv1 = Conv(in_dim, self.inter_dim, k=1, norm_type=norm_type, act_type=act_type)
			
 
				+        self.cv2 = Conv(in_dim, self.inter_dim, k=1, norm_type=norm_type, act_type=act_type)
			
 
				         self.m = nn.Sequential(*(
			
 
				             Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
			
 
				             for _ in range(num_blocks)))
			
 
				-        self.output_proj = Conv((2 + num_blocks) * self.inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				+        self.cv3 = Conv((2 + num_blocks) * self.inter_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				+
			
 
				 
			
 
				     def forward(self, x):
			
 
				-        # Input proj
			
 
				-        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
			
 
				+        x1 = self.cv1(x)
			
 
				+        x2 = self.cv2(x)
			
 
				         out = list([x1, x2])
			
 
				 
			
 
				-        # Bottlenecl
			
 
				         out.extend(m(out[-1]) for m in self.m)
			
 
				 
			
 
				-        # Output proj
			
 
				-        out = self.output_proj(torch.cat(out, dim=1))
			
 
				+        out = self.cv3(torch.cat(out, dim=1))
			
 
				 
			
 
				         return out
			
 
				     
			
--- a/models/detectors/yolov5/README.md
+++ b/models/detectors/yolov5/README.md
@@ -19,7 +19,6 @@ On the other hand, we are trying to use **AdamW** and larger batch size to train
 
				 |   Model   | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				 |-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				 | YOLOv5-N  | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				-| YOLOv5-T  | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | YOLOv5-S  | 8xb16 |  640  |         39.2           |        57.9       |        27.3       |         9.0        | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov5_s_coco_adamw.pth) |
			
 
				 | YOLOv5-M  | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | YOLOv5-L  | 8xb16 |  640  |                        |                   |                   |                    |  |
			
--- a/models/detectors/yolov5/yolov5.py
+++ b/models/detectors/yolov5/yolov5.py
@@ -45,7 +45,7 @@ class YOLOv5(nn.Module):
 
				         
			
 
				         # ------------------- Network Structure -------------------
			
 
				         ## Backbone
			
 
				-        self.backbone, feats_dim = build_backbone(cfg)
			
 
				+        self.backbone, feats_dim = build_backbone(cfg, pretrained=cfg['bk_pretrained']&trainable)
			
 
				         
			
 
				         ## FPN
			
 
				         self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))
			
--- a/models/detectors/yolov5/yolov5_backbone.py
+++ b/models/detectors/yolov5/yolov5_backbone.py
@@ -9,6 +9,16 @@ except:
 
				     from yolov5_neck import SPPF
			
 
				 
			
 
				 
			
 
				+# ImageNet-1K pretrained weight
			
 
				+model_urls = {
			
 
				+    "cspdarknet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_nano.pth",
			
 
				+    "cspdarknet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_small.pth",
			
 
				+    "cspdarknet_m": None,  # For Medium-level, it is not necessary to load pretrained weight.
			
 
				+    "cspdarknet_l": None,  # For Large-level,  it is not necessary to load pretrained weight.
			
 
				+    "cspdarknet_x": None,  # For Huge-level,   it is not necessary to load pretrained weight.
			
 
				+}
			
 
				+
			
 
				+
			
 
				 # CSPDarkNet
			
 
				 class CSPDarkNet(nn.Module):
			
 
				     def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False):
			
@@ -80,11 +90,57 @@ class CSPDarkNet(nn.Module):
 
				 
			
 
				 
			
 
				 # ---------------------------- Functions ----------------------------
			
 
				+## load pretrained weight
			
 
				+def load_weight(model, model_name):
			
 
				+    # load weight
			
 
				+    print('Loading pretrained weight ...')
			
 
				+    url = model_urls[model_name]
			
 
				+    if url is not None:
			
 
				+        checkpoint = torch.hub.load_state_dict_from_url(
			
 
				+            url=url, map_location="cpu", check_hash=True)
			
 
				+        # checkpoint state dict
			
 
				+        checkpoint_state_dict = checkpoint.pop("model")
			
 
				+        # model state dict
			
 
				+        model_state_dict = model.state_dict()
			
 
				+        # check
			
 
				+        for k in list(checkpoint_state_dict.keys()):
			
 
				+            if k in model_state_dict:
			
 
				+                shape_model = tuple(model_state_dict[k].shape)
			
 
				+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
			
 
				+                if shape_model != shape_checkpoint:
			
 
				+                    checkpoint_state_dict.pop(k)
			
 
				+            else:
			
 
				+                checkpoint_state_dict.pop(k)
			
 
				+                print(k)
			
 
				+
			
 
				+        model.load_state_dict(checkpoint_state_dict)
			
 
				+    else:
			
 
				+        print('No pretrained for {}'.format(model_name))
			
 
				+
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				 ## build CSPDarkNet
			
 
				-def build_backbone(cfg): 
			
 
				+def build_backbone(cfg, pretrained=False): 
			
 
				+    # Build backbone
			
 
				     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
 
				     feat_dims = backbone.feat_dims[-3:]
			
 
				 
			
 
				+    # Load pretrained weight
			
 
				+    if pretrained:
			
 
				+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_n')
			
 
				+        elif cfg['width'] == 0.375 and cfg['depth'] == 0.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_t')
			
 
				+        elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_s')
			
 
				+        elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_m')
			
 
				+        elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_l')
			
 
				+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_x')
			
 
				+
			
 
				     return backbone, feat_dims
			
 
				 
			
 
				 
			
@@ -92,16 +148,16 @@ if __name__ == '__main__':
 
				     import time
			
 
				     from thop import profile
			
 
				     cfg = {
			
 
				-        'pretrained': False,
			
 
				-        'bk_act': 'lrelu',
			
 
				+        'bk_pretrained': True,
			
 
				+        'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
 
				         'p6_feat': False,
			
 
				         'p7_feat': False,
			
 
				-        'width': 1.0,
			
 
				-        'depth': 1.0,
			
 
				+        'width': 0.50,
			
 
				+        'depth': 0.34,
			
 
				     }
			
 
				-    model, feats = build_backbone(cfg)
			
 
				+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
			
 
				     x = torch.randn(1, 3, 224, 224)
			
 
				     t0 = time.time()
			
 
				     outputs = model(x)
			
--- a/models/detectors/yolov5/yolov5_neck.py
+++ b/models/detectors/yolov5/yolov5_neck.py
@@ -1,6 +1,9 @@
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				-from .yolov5_basic import Conv
			
 
				+try:
			
 
				+    from .yolov5_basic import Conv
			
 
				+except:
			
 
				+    from yolov5_basic import Conv
			
 
				 
			
 
				 
			
 
				 # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
			
--- a/models/detectors/yolox/README.md
+++ b/models/detectors/yolox/README.md
@@ -17,7 +17,6 @@ On the other hand, we are trying to use **AdamW** to train our reproduced YOLOX.
 
				 |   Model | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				 |---------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				 | YOLOX-N | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				-| YOLOX-T | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | YOLOX-S | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | YOLOX-M | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 | YOLOX-L | 8xb16 |  640  |                        |                   |                   |                    |  |
			
--- a/models/detectors/yolox/yolox.py
+++ b/models/detectors/yolox/yolox.py
@@ -37,7 +37,7 @@ class YOLOX(nn.Module):
 
				                 
			
 
				         # ------------------- Network Structure -------------------
			
 
				         ## 主干网络
			
 
				-        self.backbone, feats_dim = build_backbone(cfg)
			
 
				+        self.backbone, feats_dim = build_backbone(cfg, pretrained=cfg['bk_pretrained']&trainable)
			
 
				         
			
 
				         ## 特征金字塔
			
 
				         self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))
			
--- a/models/detectors/yolox/yolox_backbone.py
+++ b/models/detectors/yolox/yolox_backbone.py
@@ -9,6 +9,16 @@ except:
 
				     from yolox_neck import SPPF
			
 
				 
			
 
				 
			
 
				+# ImageNet-1K pretrained weight
			
 
				+model_urls = {
			
 
				+    "cspdarknet_n": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_n.pth",
			
 
				+    "cspdarknet_s": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_s.pth",
			
 
				+    "cspdarknet_m": None,  # For Medium-level, it is not necessary to load pretrained weight.
			
 
				+    "cspdarknet_l": None,  # For Large-level,  it is not necessary to load pretrained weight.
			
 
				+    "cspdarknet_x": None,  # For Huge-level,   it is not necessary to load pretrained weight.
			
 
				+}
			
 
				+
			
 
				+
			
 
				 # CSPDarkNet
			
 
				 class CSPDarkNet(nn.Module):
			
 
				     def __init__(self, depth=1.0, width=1.0, act_type='silu', norm_type='BN', depthwise=False):
			
@@ -80,11 +90,55 @@ class CSPDarkNet(nn.Module):
 
				 
			
 
				 
			
 
				 # ---------------------------- Functions ----------------------------
			
 
				+## load pretrained weight
			
 
				+def load_weight(model, model_name):
			
 
				+    # load weight
			
 
				+    print('Loading pretrained weight ...')
			
 
				+    url = model_urls[model_name]
			
 
				+    if url is not None:
			
 
				+        checkpoint = torch.hub.load_state_dict_from_url(
			
 
				+            url=url, map_location="cpu", check_hash=True)
			
 
				+        # checkpoint state dict
			
 
				+        checkpoint_state_dict = checkpoint.pop("model")
			
 
				+        # model state dict
			
 
				+        model_state_dict = model.state_dict()
			
 
				+        # check
			
 
				+        for k in list(checkpoint_state_dict.keys()):
			
 
				+            if k in model_state_dict:
			
 
				+                shape_model = tuple(model_state_dict[k].shape)
			
 
				+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
			
 
				+                if shape_model != shape_checkpoint:
			
 
				+                    checkpoint_state_dict.pop(k)
			
 
				+            else:
			
 
				+                checkpoint_state_dict.pop(k)
			
 
				+                print(k)
			
 
				+
			
 
				+        model.load_state_dict(checkpoint_state_dict)
			
 
				+    else:
			
 
				+        print('No pretrained for {}'.format(model_name))
			
 
				+
			
 
				+    return model
			
 
				+
			
 
				+
			
 
				 ## build CSPDarkNet
			
 
				-def build_backbone(cfg): 
			
 
				+def build_backbone(cfg, pretrained=False): 
			
 
				+    # Build backbone
			
 
				     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
 
				     feat_dims = backbone.feat_dims[-3:]
			
 
				 
			
 
				+    # Load pretrained weight
			
 
				+    if pretrained:
			
 
				+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_n')
			
 
				+        elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_s')
			
 
				+        elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_m')
			
 
				+        elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_l')
			
 
				+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
			
 
				+            backbone = load_weight(backbone, model_name='cspdarknet_x')
			
 
				+
			
 
				     return backbone, feat_dims
			
 
				 
			
 
				 
			
@@ -92,6 +146,7 @@ if __name__ == '__main__':
 
				     import time
			
 
				     from thop import profile
			
 
				     cfg = {
			
 
				+        'bk_pretrained': True,
			
 
				         'bk_act': 'lrelu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -100,7 +155,7 @@ if __name__ == '__main__':
 
				         'width': 1.0,
			
 
				         'depth': 1.0,
			
 
				     }
			
 
				-    model, feats = build_backbone(cfg)
			
 
				+    model, feats = build_backbone(cfg, pretrained=cfg['bk_pretrained'])
			
 
				     x = torch.randn(1, 3, 640, 640)
			
 
				     t0 = time.time()
			
 
				     outputs = model(x)