Explorar el Código

optimize codes

yjh0410 hace 1 año
padre
commit
9dbbba8664

+ 23 - 30
config/__init__.py

@@ -16,13 +16,15 @@ def build_dataset_config(args):
 
 # ------------------ Transform Config ------------------
 from .data_config.transform_config import (
+    # SSD-Style
+    ssd_trans_config,
     # YOLOv5-Style
-    yolov5_p_trans_config,
-    yolov5_n_trans_config,
-    yolov5_s_trans_config,
-    yolov5_m_trans_config,
-    yolov5_l_trans_config,
-    yolov5_x_trans_config,
+    yolo_p_trans_config,
+    yolo_n_trans_config,
+    yolo_s_trans_config,
+    yolo_m_trans_config,
+    yolo_l_trans_config,
+    yolo_x_trans_config,
     # YOLOX-Style
     yolox_p_trans_config,
     yolox_n_trans_config,
@@ -30,10 +32,6 @@ from .data_config.transform_config import (
     yolox_m_trans_config,
     yolox_l_trans_config,
     yolox_x_trans_config,
-    # SSD-Style
-    ssd_trans_config,
-    # RT-DETR style
-    rtdetr_base_trans_config,
 )
 
 def build_trans_config(trans_config='ssd'):
@@ -45,18 +43,18 @@ def build_trans_config(trans_config='ssd'):
         cfg = ssd_trans_config
 
     # YOLOv5-style transform 
-    elif trans_config == 'yolov5_p':
-        cfg = yolov5_p_trans_config
-    elif trans_config == 'yolov5_n':
-        cfg = yolov5_n_trans_config
-    elif trans_config == 'yolov5_s':
-        cfg = yolov5_s_trans_config
-    elif trans_config == 'yolov5_m':
-        cfg = yolov5_m_trans_config
-    elif trans_config == 'yolov5_l':
-        cfg = yolov5_l_trans_config
-    elif trans_config == 'yolov5_x':
-        cfg = yolov5_x_trans_config
+    elif trans_config == 'yolo_p':
+        cfg = yolo_p_trans_config
+    elif trans_config == 'yolo_n':
+        cfg = yolo_n_trans_config
+    elif trans_config == 'yolo_s':
+        cfg = yolo_s_trans_config
+    elif trans_config == 'yolo_m':
+        cfg = yolo_m_trans_config
+    elif trans_config == 'yolo_l':
+        cfg = yolo_l_trans_config
+    elif trans_config == 'yolo_x':
+        cfg = yolo_x_trans_config
         
     # YOLOX-style transform 
     elif trans_config == 'yolox_p':
@@ -72,10 +70,8 @@ def build_trans_config(trans_config='ssd'):
     elif trans_config == 'yolox_x':
         cfg = yolox_x_trans_config
 
-    # RT-DETR style
-    elif trans_config == 'rtdetr_base':
-        cfg = rtdetr_base_trans_config
-
+    else:
+        raise NotImplementedError("Unknown transform config: {}".format(trans_config))
     print('Transform Config: {} \n'.format(cfg))
 
     return cfg
@@ -87,7 +83,7 @@ from .model_config.yolov1_config import yolov1_cfg
 from .model_config.yolov2_config import yolov2_cfg
 from .model_config.yolov3_config import yolov3_cfg
 from .model_config.yolov4_config import yolov4_cfg
-from .model_config.yolov5_config import yolov5_cfg, yolov5_adamw_cfg
+from .model_config.yolov5_config import yolov5_cfg
 from .model_config.yolov7_config import yolov7_cfg
 from .model_config.yolov8_config import yolov8_cfg
 from .model_config.yolox_config  import yolox_cfg
@@ -110,9 +106,6 @@ def build_model_config(args):
     # YOLOv5
     elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
         cfg = yolov5_cfg[args.model]
-    # YOLOv5-AdamW
-    elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
-        cfg = yolov5_adamw_cfg[args.model]
     # YOLOv7
     elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']:
         cfg = yolov7_cfg[args.model]

+ 31 - 71
config/data_config/transform_config.py

@@ -1,9 +1,22 @@
 # Transform config
 
 
+# ----------------------- SSD-Style Transform -----------------------
+ssd_trans_config = {
+    'aug_type': 'ssd',
+    'use_ablu': False,
+    # Mosaic & Mixup are not used for SSD-style augmentation
+    'mosaic_prob': 0.0,
+    'mixup_prob':  0.0,
+    'mosaic_type': 'yolov5',
+    'mixup_type':  'yolov5',
+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
+}
+
+
 # ----------------------- YOLOv5-Style Transform -----------------------
-yolov5_x_trans_config = {
-    'aug_type': 'yolov5',
+yolo_x_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -17,7 +30,6 @@ yolov5_x_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.2,
     'mosaic_type': 'yolov5',
@@ -25,8 +37,8 @@ yolov5_x_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
 
-yolov5_l_trans_config = {
-    'aug_type': 'yolov5',
+yolo_l_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -40,7 +52,6 @@ yolov5_l_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.15,
     'mosaic_type': 'yolov5',
@@ -48,8 +59,8 @@ yolov5_l_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
 
-yolov5_m_trans_config = {
-    'aug_type': 'yolov5',
+yolo_m_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -63,7 +74,6 @@ yolov5_m_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.10,
     'mosaic_type': 'yolov5',
@@ -71,8 +81,8 @@ yolov5_m_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
 
-yolov5_s_trans_config = {
-    'aug_type': 'yolov5',
+yolo_s_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -86,7 +96,6 @@ yolov5_s_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
@@ -94,8 +103,8 @@ yolov5_s_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
 
-yolov5_n_trans_config = {
-    'aug_type': 'yolov5',
+yolo_n_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -109,7 +118,6 @@ yolov5_n_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
@@ -117,8 +125,8 @@ yolov5_n_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
 
-yolov5_p_trans_config = {
-    'aug_type': 'yolov5',
+yolo_p_trans_config = {
+    'aug_type': 'yolo',
     'use_ablu': True,
     # Basic Augment
     'affine_params': {
@@ -132,7 +140,6 @@ yolov5_p_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 0.5,
     'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
@@ -143,7 +150,7 @@ yolov5_p_trans_config = {
 
 # ----------------------- YOLOX-Style Transform -----------------------
 yolox_x_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -157,7 +164,6 @@ yolox_x_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  1.0,
     'mosaic_type': 'yolov5',
@@ -166,7 +172,7 @@ yolox_x_trans_config = {
 }
 
 yolox_l_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -180,7 +186,6 @@ yolox_l_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  1.0,
     'mosaic_type': 'yolov5',
@@ -189,7 +194,7 @@ yolox_l_trans_config = {
 }
 
 yolox_m_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -203,7 +208,6 @@ yolox_m_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  1.0,
     'mosaic_type': 'yolov5',
@@ -212,7 +216,7 @@ yolox_m_trans_config = {
 }
 
 yolox_s_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -226,7 +230,6 @@ yolox_s_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  1.0,
     'mosaic_type': 'yolov5',
@@ -235,7 +238,7 @@ yolox_s_trans_config = {
 }
 
 yolox_n_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -249,7 +252,6 @@ yolox_n_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 1.0,
     'mixup_prob':  0.5,
     'mosaic_type': 'yolov5',
@@ -258,7 +260,7 @@ yolox_n_trans_config = {
 }
 
 yolox_p_trans_config = {
-    'aug_type': 'yolov5',
+    'aug_type': 'yolo',
     'use_ablu': False,
     # Basic Augment
     'affine_params': {
@@ -272,51 +274,9 @@ yolox_p_trans_config = {
         'hsv_v': 0.4,
     },
     # Mosaic & Mixup
-    'mosaic_keep_ratio': True,
     'mosaic_prob': 0.5,
     'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
     'mixup_type':  'yolox',
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
 }
-
-
-# ----------------------- SSD-Style Transform -----------------------
-ssd_trans_config = {
-    'aug_type': 'ssd',
-    'use_ablu': False,
-    # Mosaic & Mixup are not used for SSD-style augmentation
-    'mosaic_keep_ratio': False,
-    'mosaic_prob': 0.0,
-    'mixup_prob':  0.0,
-    'mosaic_type': 'yolov5',
-    'mixup_type':  'yolov5',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
-}
-
-
-# ----------------------- SSD-Style Transform -----------------------
-rtdetr_base_trans_config = {
-    'aug_type': 'rtdetr',
-    'use_ablu': True,
-    'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics
-    'pixel_std':  [58.395, 57.12, 57.375],    # IN-1K statistics
-    # Basic Augment
-    'affine_params': {
-        'degrees': 0.0,
-        'translate': 0.2,
-        'scale': [0.1, 2.0],
-        'shear': 0.0,
-        'perspective': 0.0,
-        'hsv_h': 0.015,
-        'hsv_s': 0.7,
-        'hsv_v': 0.4,
-    },
-    # Mosaic & Mixup
-    'mosaic_keep_ratio': False,
-    'mosaic_prob': 0.0,
-    'mixup_prob':  0.0,
-    'mosaic_type': 'yolov5',
-    'mixup_type':  'yolov5',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
-}

+ 13 - 11
config/model_config/yolov1_config.py

@@ -1,32 +1,34 @@
 # YOLOv1 Config
 
 yolov1_cfg = {
-    # input
-    'trans_type': 'ssd',
-    'multi_scale': [0.5, 1.5],
-    # model
+    # ---------------- Model config ----------------
+    ## Backbone
     'backbone': 'resnet18',
     'pretrained': True,
     'stride': 32,  # P5
     'max_stride': 32,
-    # neck
+    ## Neck
     'neck': 'sppf',
-    'expand_ratio': 0.5,
-    'pooling_size': 5,
     'neck_act': 'lrelu',
     'neck_norm': 'BN',
     'neck_depthwise': False,
-    # head
+    'expand_ratio': 0.5,
+    'pooling_size': 5,
+    ## Head
     'head': 'decoupled_head',
     'head_act': 'lrelu',
     'head_norm': 'BN',
     'num_cls_head': 2,
     'num_reg_head': 2,
     'head_depthwise': False,
-    # loss weight
+    # ---------------- Data process config ----------------
+    ## Input
+    'multi_scale': [0.5, 1.5], # 320 -> 960
+    'trans_type': 'ssd',
+    # ---------------- Loss config ----------------
     'loss_obj_weight': 1.0,
     'loss_cls_weight': 1.0,
     'loss_box_weight': 5.0,
-    # training configuration
-    'trainer_type': 'yolov8',
+    # ---------------- Trainer config ----------------
+    'trainer_type': 'yolo',
 }

+ 15 - 13
config/model_config/yolov2_config.py

@@ -1,22 +1,20 @@
 # YOLOv2 Config
 
 yolov2_cfg = {
-    # input
-    'trans_type': 'ssd',
-    'multi_scale': [0.5, 1.5],
-    # model
+    # ---------------- Model config ----------------
+    ## Backbone
     'backbone': 'darknet19',
     'pretrained': True,
     'stride': 32,  # P5
     'max_stride': 32,
-    # neck
+    ## Neck
     'neck': 'sppf',
-    'expand_ratio': 0.5,
-    'pooling_size': 5,
     'neck_act': 'lrelu',
     'neck_norm': 'BN',
     'neck_depthwise': False,
-    # head
+    'expand_ratio': 0.5,
+    'pooling_size': 5,
+    ## Head
     'head': 'decoupled_head',
     'head_act': 'lrelu',
     'head_norm': 'BN',
@@ -27,13 +25,17 @@ yolov2_cfg = {
                     [55,  75],
                     [92,  206],
                     [202, 21],
-                    [289, 311]],  # 416
-    # matcher
+                    [289, 311]],  # 416 scale
+    # ---------------- Data process config ----------------
+    ## Input
+    'multi_scale': [0.5, 1.5], # 320 -> 960
+    'trans_type': 'ssd',
+    # ---------------- Matcher config ----------------
     'iou_thresh': 0.5,
-    # loss weight
+    # ---------------- Loss config ----------------
     'loss_obj_weight': 1.0,
     'loss_cls_weight': 1.0,
     'loss_box_weight': 5.0,
-    # training configuration
-    'trainer_type': 'yolov8',
+    # ---------------- Trainer config ----------------
+    'trainer_type': 'yolo',
 }

+ 10 - 11
config/model_config/yolov3_config.py

@@ -12,11 +12,11 @@ yolov3_cfg = {
         'max_stride': 32,
         ## Neck
         'neck': 'sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         ## FPN
         'fpn': 'yolov3_fpn',
         'fpn_act': 'silu',
@@ -32,9 +32,8 @@ yolov3_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'trans_type': 'yolov5_l',
+        # ---------------- Data process config ----------------
+        'trans_type': 'yolo_l',
         'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
@@ -45,7 +44,7 @@ yolov3_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov3_tiny':{
@@ -59,11 +58,11 @@ yolov3_cfg = {
         'max_stride': 32,
         ## Neck
         'neck': 'sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         ## FPN
         'fpn': 'yolov3_fpn',
         'fpn_act': 'silu',
@@ -79,9 +78,9 @@ yolov3_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
-        'trans_type': 'yolov5_n',
+        'trans_type': 'yolo_n',
         'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
@@ -92,7 +91,7 @@ yolov3_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
 }

+ 8 - 10
config/model_config/yolov4_config.py

@@ -32,9 +32,8 @@ yolov4_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'trans_type': 'yolov5_l',
+        # ---------------- Data process config ----------------
+        'trans_type': 'yolo_l',
         'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
@@ -45,7 +44,7 @@ yolov4_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov4_tiny':{
@@ -59,11 +58,11 @@ yolov4_cfg = {
         'max_stride': 32,
         ## Neck
         'neck': 'csp_sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         ## FPN
         'fpn': 'yolov4_pafpn',
         'fpn_act': 'silu',
@@ -79,9 +78,8 @@ yolov4_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'trans_type': 'yolov5_n',
+        # ---------------- Data process config ----------------
+        'trans_type': 'yolo_n',
         'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
@@ -92,7 +90,7 @@ yolov4_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
 }

+ 15 - 244
config/model_config/yolov5_config.py

@@ -31,10 +31,10 @@ yolov5_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_n',
+        'trans_type': 'yolo_n',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -44,7 +44,7 @@ yolov5_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
+        'trainer_type': 'yolo',
     },
 
     'yolov5_s':{
@@ -76,10 +76,10 @@ yolov5_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_s',
+        'trans_type': 'yolo_s',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -89,7 +89,7 @@ yolov5_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
+        'trainer_type': 'yolo',
     },
 
     'yolov5_m':{
@@ -121,10 +121,10 @@ yolov5_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_m',
+        'trans_type': 'yolo_m',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -134,7 +134,7 @@ yolov5_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
+        'trainer_type': 'yolo',
     },
 
     'yolov5_l':{
@@ -166,10 +166,10 @@ yolov5_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_l',
+        'trans_type': 'yolo_l',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -179,7 +179,7 @@ yolov5_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
+        'trainer_type': 'yolo',
     },
 
     'yolov5_x':{
@@ -211,10 +211,10 @@ yolov5_cfg = {
         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
                         [30, 61],   [62, 45],   [59, 119],    # P4
                         [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
+        # ---------------- Data process config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_x',
+        'trans_type': 'yolo_x',
         # ---------------- Assignment config ----------------
         ## matcher
         'anchor_thresh': 4.0,
@@ -224,235 +224,6 @@ yolov5_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'yolov8',
+        'trainer_type': 'yolo',
     },
 }
-
-
-yolov5_adamw_cfg = {
-    'yolov5_n_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.25,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_n',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
-    'yolov5_s_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.50,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_s',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
-    'yolov5_m_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 0.75,
-        'depth': 0.67,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_m',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
-    'yolov5_l_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 1.0,
-        'depth': 1.0,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_l',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
-    'yolov5_x_adamw':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'cspdarknet',
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_dpw': False,
-        'width': 1.25,
-        'depth': 1.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## FPN
-        'fpn': 'yolov5_pafpn',
-        'fpn_reduce_layer': 'Conv',
-        'fpn_downsample_layer': 'Conv',
-        'fpn_core_block': 'CSPBlock',
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                        [30, 61],   [62, 45],   [59, 119],    # P4
-                        [116, 90],  [156, 198], [373, 326]],  # P5
-        # ---------------- Train config ----------------
-        ## input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'yolov5_x',
-        # ---------------- Assignment config ----------------
-        ## matcher
-        'anchor_thresh': 4.0,
-        # ---------------- Loss config ----------------
-        ## loss weight
-        'loss_obj_weight': 1.0,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
-    },
-
-}

+ 13 - 16
config/model_config/yolov7_config.py

@@ -5,7 +5,7 @@ yolov7_cfg = {
         # ---------------- Model config ----------------
         ## Backbone
         'backbone': 'elannet_tiny',
-        'pretrained': True,
+        'pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -13,11 +13,11 @@ yolov7_cfg = {
         'max_stride': 32,
         ## Neck
         'neck': 'csp_sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         ## FPN
         'fpn': 'yolov7_pafpn',
         'fpn_act': 'silu',
@@ -33,8 +33,7 @@ yolov7_cfg = {
         'num_cls_head': 2,
         'num_reg_head': 2,
         'head_depthwise': False,
-        # ---------------- Train config ----------------
-        ## input
+        # ---------------- Data process config ----------------
         'trans_type': 'yolox_s',
         'multi_scale': [0.5, 1.5], # 320 -> 960
         # ---------------- Assignment config ----------------
@@ -47,7 +46,7 @@ yolov7_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov7':{
@@ -62,11 +61,11 @@ yolov7_cfg = {
         'max_stride': 32,
         # neck
         'neck': 'csp_sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         # fpn
         'fpn': 'yolov7_pafpn',
         'fpn_act': 'silu',
@@ -82,8 +81,7 @@ yolov7_cfg = {
         'num_cls_head': 2,
         'num_reg_head': 2,
         'head_depthwise': False,
-        # ---------------- Train config ----------------
-        ## input
+        # ---------------- Data process config ----------------
         'trans_type': 'yolox_l',
         'multi_scale': [0.5, 1.25], # 320 -> 800
         # ---------------- Assignment config ----------------
@@ -96,7 +94,7 @@ yolov7_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov7_x':{
@@ -111,11 +109,11 @@ yolov7_cfg = {
         'max_stride': 32,
         ## Neck
         'neck': 'csp_sppf',
-        'expand_ratio': 0.5,
-        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
         ## FPN
         'fpn': 'yolov7_pafpn',
         'fpn_act': 'silu',
@@ -131,8 +129,7 @@ yolov7_cfg = {
         'num_cls_head': 2,
         'num_reg_head': 2,
         'head_depthwise': False,
-        # ---------------- Train config ----------------
-        ## input
+        # ---------------- Data process config ----------------
         'trans_type': 'yolox_x',
         'multi_scale': [0.5, 1.5], # 320 -> 960
         # ---------------- Assignment config ----------------
@@ -145,7 +142,7 @@ yolov7_cfg = {
         'loss_cls_weight': 1.0,
         'loss_box_weight': 5.0,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
 }

+ 10 - 10
config/model_config/yolov8_config.py

@@ -37,7 +37,7 @@ yolov8_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.5], # 320 -> 960
-        'trans_type': 'yolov5_n',
+        'trans_type': 'yolo_n',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': "tal",
@@ -49,7 +49,7 @@ yolov8_cfg = {
         'loss_box_weight': 7.5,
         'loss_dfl_weight': 1.5,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov8_s':{
@@ -87,7 +87,7 @@ yolov8_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.5], # 320 -> 960
-        'trans_type': 'yolov5_s',
+        'trans_type': 'yolo_s',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': "tal",
@@ -99,7 +99,7 @@ yolov8_cfg = {
         'loss_box_weight': 7.5,
         'loss_dfl_weight': 1.5,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov8_m':{
@@ -137,7 +137,7 @@ yolov8_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.5], # 320 -> 960
-        'trans_type': 'yolov5_m',
+        'trans_type': 'yolo_m',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': "tal",
@@ -149,7 +149,7 @@ yolov8_cfg = {
         'loss_box_weight': 7.5,
         'loss_dfl_weight': 1.5,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov8_l':{
@@ -187,7 +187,7 @@ yolov8_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.5], # 320 -> 960
-        'trans_type': 'yolov5_l',
+        'trans_type': 'yolo_l',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': "tal",
@@ -199,7 +199,7 @@ yolov8_cfg = {
         'loss_box_weight': 7.5,
         'loss_dfl_weight': 1.5,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
     'yolov8_x':{
@@ -237,7 +237,7 @@ yolov8_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.5], # 320 -> 960
-        'trans_type': 'yolov5_x',
+        'trans_type': 'yolo_x',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': "tal",
@@ -249,7 +249,7 @@ yolov8_cfg = {
         'loss_box_weight': 7.5,
         'loss_dfl_weight': 1.5,
         # ---------------- Train config ----------------
-        'trainer_type': 'rtcdet',
+        'trainer_type': 'yolo',
     },
 
 }

+ 12 - 28
dataset/build.py

@@ -2,28 +2,25 @@ import os
 
 try:
     # dataset class
-    from .voc import VOCDataset
-    from .coco import COCODataset
+    from .voc        import VOCDataset
+    from .coco       import COCODataset
     from .crowdhuman import CrowdHumanDataset
-    from .widerface import WiderFaceDataset
-    from .customed import CustomedDataset
+    from .widerface  import WiderFaceDataset
+    from .customed   import CustomedDataset
     # transform class
-    from .data_augment.ssd_augment import SSDAugmentation, SSDBaseTransform
+    from .data_augment.ssd_augment    import SSDAugmentation, SSDBaseTransform
     from .data_augment.yolov5_augment import YOLOv5Augmentation, YOLOv5BaseTransform
-    from .data_augment.rtdetr_augment import RTDetrAugmentation, RTDetrBaseTransform
 
 except:
     # dataset class
-    from voc import VOCDataset
-    from coco import COCODataset
+    from voc        import VOCDataset
+    from coco       import COCODataset
     from crowdhuman import CrowdHumanDataset
-    from widerface import WiderFaceDataset
-    from customed import CustomedDataset
+    from widerface  import WiderFaceDataset
+    from customed   import CustomedDataset
     # transform class
-    from data_augment.ssd_augment import SSDAugmentation, SSDBaseTransform
+    from data_augment.ssd_augment    import SSDAugmentation, SSDBaseTransform
     from data_augment.yolov5_augment import YOLOv5Augmentation, YOLOv5BaseTransform
-    from data_augment.rtdetr_augment import RTDetrAugmentation, RTDetrBaseTransform
-
 
 # ------------------------------ Dataset ------------------------------
 def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
@@ -48,7 +45,6 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
                              transform    = transform,
                              trans_config = trans_config,
                              is_train     = is_train,
-                             load_cache   = args.load_cache
                              )
     ## COCO dataset
     elif args.dataset == 'coco':
@@ -59,7 +55,6 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
                               transform    = transform,
                               trans_config = trans_config,
                               is_train     = is_train,
-                              load_cache   = args.load_cache
                               )
     ## CrowdHuman dataset
     elif args.dataset == 'crowdhuman':
@@ -89,8 +84,7 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
                                   image_set    = image_set,
                                   transform    = transform,
                                   trans_config = trans_config,
-                                  is_train      = is_train,
-                                  load_cache    = args.load_cache
+                                  is_train     = is_train,
                                   )
 
     return dataset, dataset_info
@@ -115,20 +109,10 @@ def build_transform(args, trans_config, max_stride=32, is_train=False):
         else:
             transform = SSDBaseTransform(args.img_size)
     ## YOLO style transform
-    elif trans_config['aug_type'] == 'yolov5':
+    elif trans_config['aug_type'] == 'yolo':
         if is_train:
             transform = YOLOv5Augmentation(args.img_size, trans_config['affine_params'], trans_config['use_ablu'])
         else:
             transform = YOLOv5BaseTransform(args.img_size, max_stride)
-    ## RT-DETR style transform
-    elif trans_config['aug_type'] == 'rtdetr':
-        if is_train:
-            transform = RTDetrAugmentation(
-                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
-            if trans_config["mosaic_prob"] > 0:
-                transform.reset_weak_augment()
-        else:
-            transform = RTDetrBaseTransform(
-                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
 
     return transform, trans_config

+ 16 - 69
dataset/coco.py

@@ -3,12 +3,9 @@ import cv2
 import time
 import random
 import numpy as np
-from torch.utils.data import Dataset
 
-try:
-    from pycocotools.coco import COCO
-except:
-    print("It seems that the COCOAPI is not installed.")
+from torch.utils.data import Dataset
+from pycocotools.coco import COCO
 
 try:
     from .data_augment.strong_augment import MosaicAugment, MixupAugment
@@ -28,7 +25,6 @@ class COCODataset(Dataset):
                  trans_config = None,
                  transform    = None,
                  is_train     :bool =False,
-                 load_cache   :bool = False,
                  ):
         # ----------- Basic parameters -----------
         self.img_size = img_size
@@ -56,8 +52,8 @@ class COCODataset(Dataset):
         if is_train:
             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
-            self.mixup_augment  = MixupAugment(img_size, trans_config)
+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
         else:
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
@@ -66,12 +62,6 @@ class COCODataset(Dataset):
         print('==============================')
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
-        print('==============================')
-        # ----------- Cached data -----------
-        self.load_cache = load_cache
-        self.cached_datas = None
-        if self.load_cache:
-            self.cached_datas = self._load_cache()
 
     # ------------ Basic dataset function ------------
     def __len__(self):
@@ -80,38 +70,6 @@ class COCODataset(Dataset):
     def __getitem__(self, index):
         return self.pull_item(index)
 
-    def _load_cache(self):
-        data_items = []
-        for idx in range(self.dataset_size):
-            if idx % 2000 == 0:
-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
-
-            # load a data
-            image, target = self.load_image_target(idx)
-            orig_h, orig_w, _ = image.shape
-
-            # resize image
-            r = self.img_size / max(orig_h, orig_w)
-            if r != 1: 
-                interp = cv2.INTER_LINEAR
-                new_size = (int(orig_w * r), int(orig_h * r))
-                image = cv2.resize(image, new_size, interpolation=interp)
-            img_h, img_w = image.shape[:2]
-
-            # rescale bbox
-            boxes = target["boxes"].copy()
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
-            target["boxes"] = boxes
-
-            dict_item = {}
-            dict_item["image"] = image
-            dict_item["target"] = target
-
-            data_items.append(dict_item)
-        
-        return data_items
-
     # ------------ Mosaic & Mixup ------------
     def load_mosaic(self, index):
         # ------------ Prepare 4 indexes of images ------------
@@ -150,25 +108,17 @@ class COCODataset(Dataset):
     
     # ------------ Load data function ------------
     def load_image_target(self, index):
-        # == Load a data from the cached data ==
-        if self.cached_datas is not None:
-            # load a data
-            data_item = self.cached_datas[index]
-            image = data_item["image"]
-            target = data_item["target"]
-        # == Load a data from the local disk ==
-        else:        
-            # load an image
-            image, _ = self.pull_image(index)
-            height, width, channels = image.shape
-
-            # load a target
-            bboxes, labels = self.pull_anno(index)
-            target = {
-                "boxes": bboxes,
-                "labels": labels,
-                "orig_size": [height, width]
-            }
+        # load an image
+        image, _ = self.pull_image(index)
+        height, width, channels = image.shape
+
+        # load a target
+        bboxes, labels = self.pull_anno(index)
+        target = {
+            "boxes": bboxes,
+            "labels": labels,
+            "orig_size": [height, width]
+        }
 
         return image, target
 
@@ -256,7 +206,7 @@ if __name__ == "__main__":
     parser.add_argument('-size', '--img_size', default=640, type=int,
                         help='input image size.')
     parser.add_argument('--aug_type', type=str, default='ssd',
-                        help='augmentation type: ssd, yolov5, rtdetr.')
+                        help='augmentation type: ssd, yolo.')
     parser.add_argument('--mosaic', default=0., type=float,
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=0., type=float,
@@ -265,8 +215,6 @@ if __name__ == "__main__":
                         help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
-    parser.add_argument('--load_cache', action="store_true", default=False,
-                        help='load cached data.')
     
     args = parser.parse_args()
 
@@ -306,7 +254,6 @@ if __name__ == "__main__":
         trans_config=trans_config,
         transform=transform,
         is_train=args.is_train,
-        load_cache=args.load_cache
         )
     
     np.random.seed(0)

+ 4 - 7
dataset/crowdhuman.py

@@ -47,8 +47,8 @@ class CrowdHumanDataset(Dataset):
         if is_train:
             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
-            self.mixup_augment  = MixupAugment(img_size, trans_config)
+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
         else:
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
@@ -57,7 +57,6 @@ class CrowdHumanDataset(Dataset):
         print('==============================')
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
-        print('==============================')
 
     # ------------ Basic dataset function ------------
     def __len__(self):
@@ -200,8 +199,6 @@ if __name__ == "__main__":
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=0., type=float,
                         help='mixup augmentation.')
-    parser.add_argument('--mixup_type', type=str, default='yolov5_mixup',
-                        help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
 
@@ -224,8 +221,8 @@ if __name__ == "__main__":
         # Mosaic & Mixup
         'mosaic_prob': args.mosaic,
         'mixup_prob': args.mixup,
-        'mosaic_type': 'yolov5_mosaic',
-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
+        'mosaic_type': 'yolov5',
+        'mixup_type':  'yolov5',   # optional: yolov5, yolox
         'mosaic_keep_ratio': False,
         'mixup_scale': [0.5, 1.5]
     }

+ 15 - 66
dataset/customed.py

@@ -25,7 +25,6 @@ class CustomedDataset(Dataset):
                  transform          = None,
                  trans_config       = None,
                  is_train     :bool =False,
-                 load_cache   :bool = False,
                  ):
         # ----------- Basic parameters -----------
         self.img_size = img_size
@@ -46,8 +45,8 @@ class CustomedDataset(Dataset):
         if is_train:
             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
-            self.mixup_augment  = MixupAugment(img_size, trans_config)
+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
         else:
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
@@ -58,13 +57,6 @@ class CustomedDataset(Dataset):
         print('Json file: {}'.format(self.json_file))
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
-        print('==============================')
-        # ----------- Cached data -----------
-        self.load_cache = load_cache
-        self.cached_datas = None
-        if self.load_cache:
-            self.cached_datas = self._load_cache()
-
 
     # ------------ Basic dataset function ------------
     def __len__(self):
@@ -73,38 +65,6 @@ class CustomedDataset(Dataset):
     def __getitem__(self, index):
         return self.pull_item(index)
 
-    def _load_cache(self):
-        data_items = []
-        for idx in range(self.dataset_size):
-            if idx % 2000 == 0:
-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
-
-            # load a data
-            image, target = self.load_image_target(idx)
-            orig_h, orig_w, _ = image.shape
-
-            # resize image
-            r = self.img_size / max(orig_h, orig_w)
-            if r != 1: 
-                interp = cv2.INTER_LINEAR
-                new_size = (int(orig_w * r), int(orig_h * r))
-                image = cv2.resize(image, new_size, interpolation=interp)
-            img_h, img_w = image.shape[:2]
-
-            # rescale bbox
-            boxes = target["boxes"].copy()
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
-            target["boxes"] = boxes
-
-            dict_item = {}
-            dict_item["image"] = image
-            dict_item["target"] = target
-
-            data_items.append(dict_item)
-        
-        return data_items
-
     # ------------ Mosaic & Mixup ------------
     def load_mosaic(self, index):
         # ------------ Prepare 4 indexes of images ------------
@@ -143,25 +103,17 @@ class CustomedDataset(Dataset):
     
     # ------------ Load data function ------------
     def load_image_target(self, index):
-        # == Load a data from the cached data ==
-        if self.cached_datas is not None:
-            # load a data
-            data_item = self.cached_datas[index]
-            image = data_item["image"]
-            target = data_item["target"]
-        # == Load a data from the local disk ==
-        else:        
-            # load an image
-            image, _ = self.pull_image(index)
-            height, width, channels = image.shape
-
-            # load a target
-            bboxes, labels = self.pull_anno(index)
-            target = {
-                "boxes": bboxes,
-                "labels": labels,
-                "orig_size": [height, width]
-            }
+        # load an image
+        image, _ = self.pull_image(index)
+        height, width, channels = image.shape
+
+        # load a target
+        bboxes, labels = self.pull_anno(index)
+        target = {
+            "boxes": bboxes,
+            "labels": labels,
+            "orig_size": [height, width]
+        }
 
         return image, target
 
@@ -257,8 +209,6 @@ if __name__ == "__main__":
                         help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
-    parser.add_argument('--load_cache', action="store_true", default=False,
-                        help='load cached data.')
     
     args = parser.parse_args()
 
@@ -279,9 +229,8 @@ if __name__ == "__main__":
         # Mosaic & Mixup
         'mosaic_prob': args.mosaic,
         'mixup_prob': args.mixup,
-        'mosaic_type': 'yolov5_mosaic',
-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
-        'mosaic_keep_ratio': False,
+        'mosaic_type': 'yolov5',
+        'mixup_type':  'yolov5',
         'mixup_scale': [0.5, 1.5]
     }
     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)

+ 0 - 471
dataset/data_augment/rtdetr_augment.py

@@ -1,471 +0,0 @@
-# ------------------------------------------------------------
-# Data preprocessor for Real-time DETR
-# ------------------------------------------------------------
-import cv2
-import numpy as np
-from numpy import random
-
-import torch
-import torch.nn.functional as F
-
-
-# ------------------------- Augmentations -------------------------
-class Compose(object):
-    """Composes several augmentations together.
-    Args:
-        transforms (List[Transform]): list of transforms to compose.
-    Example:
-        >>> augmentations.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.ToTensor(),
-        >>> ])
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, image, target=None):
-        for t in self.transforms:
-            image, target = t(image, target)
-        return image, target
-
-## Convert color format
-class ConvertColorFormat(object):
-    def __init__(self, color_format='rgb'):
-        self.color_format = color_format
-
-    def __call__(self, image, target=None):
-        """
-        Input:
-            image: (np.array) a OpenCV image with BGR color format.
-            target: None
-        Output:
-            image: (np.array) a OpenCV image with given color format.
-            target: None
-        """
-        # Convert color format
-        if self.color_format == 'rgb':
-            image = image[..., (2, 1, 0)]    # BGR -> RGB
-        elif self.color_format == 'bgr':
-            image = image
-        else:
-            raise NotImplementedError("Unknown color format: <{}>".format(self.color_format))
-
-        return image, target
-
-## Random Photometric Distort
-class RandomPhotometricDistort(object):
-    """
-    Distort image w.r.t hue, saturation and exposure.
-    """
-
-    def __init__(self, hue=0.1, saturation=1.5, exposure=1.5):
-        super().__init__()
-        self.hue = hue
-        self.saturation = saturation
-        self.exposure = exposure
-
-    def __call__(self, image: np.ndarray, target=None) -> np.ndarray:
-        """
-        Args:
-            img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be
-                of type uint8 in range [0, 255], or floating point in range
-                [0, 1] or [0, 255].
-
-        Returns:
-            ndarray: the distorted image(s).
-        """
-        if random.random() < 0.5:
-            dhue = np.random.uniform(low=-self.hue, high=self.hue)
-            dsat = self._rand_scale(self.saturation)
-            dexp = self._rand_scale(self.exposure)
-
-            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-            image = np.asarray(image, dtype=np.float32) / 255.
-            image[:, :, 1] *= dsat
-            image[:, :, 2] *= dexp
-            H = image[:, :, 0] + dhue * 179 / 255.
-
-            if dhue > 0:
-                H[H > 1.0] -= 1.0
-            else:
-                H[H < 0.0] += 1.0
-
-            image[:, :, 0] = H
-            image = (image * 255).clip(0, 255).astype(np.uint8)
-            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
-            image = np.asarray(image, dtype=np.uint8)
-
-        return image, target
-
-    def _rand_scale(self, upper_bound):
-        """
-        Calculate random scaling factor.
-
-        Args:
-            upper_bound (float): range of the random scale.
-        Returns:
-            random scaling factor (float) whose range is
-            from 1 / s to s .
-        """
-        scale = np.random.uniform(low=1, high=upper_bound)
-        if np.random.rand() > 0.5:
-            return scale
-        return 1 / scale
-
-## Random scaling
-class RandomExpand(object):
-    def __init__(self, fill_value) -> None:
-        self.fill_value = fill_value
-
-    def __call__(self, image, target=None):
-        if random.randint(2):
-            return image, target
-
-        height, width, channels = image.shape
-        ratio = random.uniform(1, 4)
-        left = random.uniform(0, width*ratio - width)
-        top = random.uniform(0, height*ratio - height)
-
-        expand_image = np.ones(
-            (int(height*ratio), int(width*ratio), channels),
-            dtype=image.dtype) * self.fill_value
-        expand_image[int(top):int(top + height),
-                     int(left):int(left + width)] = image
-        image = expand_image
-
-        boxes = target['boxes'].copy()
-        boxes[:, :2] += (int(left), int(top))
-        boxes[:, 2:] += (int(left), int(top))
-        target['boxes'] = boxes
-
-        return image, target
-
-## Random IoU based Sample Crop
-class RandomSampleCrop(object):
-    def __init__(self):
-        self.sample_options = (
-            # using entire original input image
-            None,
-            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
-            (0.1, None),
-            (0.3, None),
-            (0.5, None),
-            (0.7, None),
-            (0.9, None),
-            # randomly sample a patch
-            (None, None),
-        )
-
-    def intersect(self, box_a, box_b):
-        max_xy = np.minimum(box_a[:, 2:], box_b[2:])
-        min_xy = np.maximum(box_a[:, :2], box_b[:2])
-        inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
-
-        return inter[:, 0] * inter[:, 1]
-
-    def compute_iou(self, box_a, box_b):
-        inter = self.intersect(box_a, box_b)
-        area_a = ((box_a[:, 2]-box_a[:, 0]) *
-                (box_a[:, 3]-box_a[:, 1]))  # [A,B]
-        area_b = ((box_b[2]-box_b[0]) *
-                (box_b[3]-box_b[1]))  # [A,B]
-        union = area_a + area_b - inter
-        return inter / union  # [A,B]
-
-    def __call__(self, image, target=None):
-        height, width, _ = image.shape
-
-        # check target
-        if len(target["boxes"]) == 0:
-            return image, target
-
-        while True:
-            # randomly choose a mode
-            sample_id = np.random.randint(len(self.sample_options))
-            mode = self.sample_options[sample_id]
-            if mode is None:
-                return image, target
-
-            boxes = target["boxes"]
-            labels = target["labels"]
-
-            min_iou, max_iou = mode
-            if min_iou is None:
-                min_iou = float('-inf')
-            if max_iou is None:
-                max_iou = float('inf')
-
-            # max trails (50)
-            for _ in range(50):
-                current_image = image
-
-                w = random.uniform(0.3 * width, width)
-                h = random.uniform(0.3 * height, height)
-
-                # aspect ratio constraint b/t .5 & 2
-                if h / w < 0.5 or h / w > 2:
-                    continue
-
-                left = random.uniform(width - w)
-                top = random.uniform(height - h)
-
-                # convert to integer rect x1,y1,x2,y2
-                rect = np.array([int(left), int(top), int(left+w), int(top+h)])
-
-                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
-                overlap = self.compute_iou(boxes, rect)
-
-                # is min and max overlap constraint satisfied? if not try again
-                if overlap.min() < min_iou and max_iou < overlap.max():
-                    continue
-
-                # cut the crop from the image
-                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
-                                              :]
-
-                # keep overlap with gt box IF center in sampled patch
-                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
-
-                # mask in all gt boxes that above and to the left of centers
-                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
-
-                # mask in all gt boxes that under and to the right of centers
-                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
-
-                # mask in that both m1 and m2 are true
-                mask = m1 * m2
-
-                # have any valid boxes? try again if not
-                if not mask.any():
-                    continue
-
-                # take only matching gt boxes
-                current_boxes = boxes[mask, :].copy()
-
-                # take only matching gt labels
-                current_labels = labels[mask]
-
-                # should we use the box left and top corner or the crop's
-                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
-                                                  rect[:2])
-                # adjust to crop (by substracting crop's left,top)
-                current_boxes[:, :2] -= rect[:2]
-
-                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
-                                                  rect[2:])
-                # adjust to crop (by substracting crop's left,top)
-                current_boxes[:, 2:] -= rect[:2]
-
-                # update target
-                target["boxes"] = current_boxes
-                target["labels"] = current_labels
-
-                return current_image, target
-
-## Random JitterCrop
-class RandomJitterCrop(object):
-    """Jitter and crop the image and box."""
-    def __init__(self, fill_value, p=0.5, jitter_ratio=0.3):
-        super().__init__()
-        self.p = p
-        self.jitter_ratio = jitter_ratio
-        self.fill_value = fill_value
-
-    def crop(self, image, pleft, pright, ptop, pbot, output_size):
-        oh, ow = image.shape[:2]
-
-        swidth, sheight = output_size
-
-        src_rect = [pleft, ptop, swidth + pleft,
-                    sheight + ptop]  # x1,y1,x2,y2
-        img_rect = [0, 0, ow, oh]
-        # rect intersection
-        new_src_rect = [max(src_rect[0], img_rect[0]),
-                        max(src_rect[1], img_rect[1]),
-                        min(src_rect[2], img_rect[2]),
-                        min(src_rect[3], img_rect[3])]
-        dst_rect = [max(0, -pleft),
-                    max(0, -ptop),
-                    max(0, -pleft) + new_src_rect[2] - new_src_rect[0],
-                    max(0, -ptop) + new_src_rect[3] - new_src_rect[1]]
-
-        # crop the image
-        cropped = np.ones([sheight, swidth, 3], dtype=image.dtype) * self.fill_value
-        # cropped[:, :, ] = np.mean(image, axis=(0, 1))
-        cropped[dst_rect[1]:dst_rect[3], dst_rect[0]:dst_rect[2]] = \
-            image[new_src_rect[1]:new_src_rect[3],
-            new_src_rect[0]:new_src_rect[2]]
-
-        return cropped
-
-    def __call__(self, image, target=None):
-        if random.random() > self.p:
-            return image, target
-        else:
-            oh, ow = image.shape[:2]
-            dw = int(ow * self.jitter_ratio)
-            dh = int(oh * self.jitter_ratio)
-            pleft = np.random.randint(-dw, dw)
-            pright = np.random.randint(-dw, dw)
-            ptop = np.random.randint(-dh, dh)
-            pbot = np.random.randint(-dh, dh)
-
-            swidth = ow - pleft - pright
-            sheight = oh - ptop - pbot
-            output_size = (swidth, sheight)
-            # crop image
-            cropped_image = self.crop(image=image,
-                                    pleft=pleft, 
-                                    pright=pright, 
-                                    ptop=ptop, 
-                                    pbot=pbot,
-                                    output_size=output_size)
-            # crop bbox
-            if target is not None:
-                bboxes = target['boxes'].copy()
-                coords_offset = np.array([pleft, ptop], dtype=np.float32)
-                bboxes[..., [0, 2]] = bboxes[..., [0, 2]] - coords_offset[0]
-                bboxes[..., [1, 3]] = bboxes[..., [1, 3]] - coords_offset[1]
-                swidth, sheight = output_size
-
-                bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], 0, swidth - 1)
-                bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], 0, sheight - 1)
-                target['boxes'] = bboxes
-
-            return cropped_image, target
-    
-## Random HFlip
-class RandomHorizontalFlip(object):
-    def __init__(self, p=0.5):
-        self.p = p
-
-    def __call__(self, image, target=None):
-        if random.random() < self.p:
-            orig_h, orig_w = image.shape[:2]
-            image = image[:, ::-1]
-            if target is not None:
-                if "boxes" in target:
-                    boxes = target["boxes"].copy()
-                    boxes[..., [0, 2]] = orig_w - boxes[..., [2, 0]]
-                    target["boxes"] = boxes
-
-        return image, target
-
-## Resize tensor image
-class Resize(object):
-    def __init__(self, img_size=640):
-        self.img_size = img_size
-
-    def __call__(self, image, target=None):
-        orig_h, orig_w = image.shape[:2]
-
-        # resize
-        image = cv2.resize(image, (self.img_size, self.img_size)).astype(np.float32)
-        img_h, img_w = image.shape[:2]
-
-        # rescale bboxes
-        if target is not None:
-            boxes = target["boxes"]
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
-            target["boxes"] = boxes
-
-        return image, target
-
-## Normalize tensor image
-class Normalize(object):
-    def __init__(self, pixel_mean, pixel_std):
-        self.pixel_mean = pixel_mean
-        self.pixel_std = pixel_std
-
-    def __call__(self, image, target=None):
-        # normalize image
-        image = (image - self.pixel_mean) / self.pixel_std
-
-        return image, target
-
-## Convert ndarray to torch.Tensor
-class ToTensor(object):
-    def __call__(self, image, target=None):        
-        # Convert torch.Tensor
-        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
-
-        if target is not None:
-            target["boxes"] = torch.as_tensor(target["boxes"]).float()
-            target["labels"] = torch.as_tensor(target["labels"]).long()
-
-        return image, target
-
-
-# ------------------------- Preprocessers -------------------------
-## Transform for Train
-class RTDetrAugmentation(object):
-    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
-        # ----------------- Basic parameters -----------------
-        self.img_size = img_size
-        self.pixel_mean = pixel_mean  # RGB format
-        self.pixel_std = pixel_std    # RGB format
-        self.color_format = 'rgb'
-        print("================= Pixel Statistics =================")
-        print("Pixel mean: {}".format(self.pixel_mean))
-        print("Pixel std:  {}".format(self.pixel_std))
-
-        # ----------------- Transforms -----------------
-        self.augment = Compose([
-            RandomPhotometricDistort(hue=0.5, saturation=1.5, exposure=1.5),
-            RandomJitterCrop(p=0.8, jitter_ratio=0.3, fill_value=self.pixel_mean[::-1]),
-            RandomHorizontalFlip(p=0.5),
-            Resize(img_size=self.img_size),
-            ConvertColorFormat(self.color_format),
-            Normalize(self.pixel_mean, self.pixel_std),
-            ToTensor()
-        ])
-
-    def reset_weak_augment(self):
-        print("Reset transform with weak augmentation ...")
-        self.augment = Compose([
-            RandomHorizontalFlip(p=0.5),
-            Resize(img_size=self.img_size),
-            ConvertColorFormat(self.color_format),
-            Normalize(self.pixel_mean, self.pixel_std),
-            ToTensor()
-        ])
-
-
-    def __call__(self, image, target, mosaic=False):
-        orig_h, orig_w = image.shape[:2]
-        ratio = [self.img_size / orig_w, self.img_size / orig_h]
-
-        image, target = self.augment(image, target)
-
-        return image, target, ratio
-
-## Transform for Eval
-class RTDetrBaseTransform(object):
-    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
-        # ----------------- Basic parameters -----------------
-        self.img_size = img_size
-        self.pixel_mean = pixel_mean  # RGB format
-        self.pixel_std = pixel_std    # RGB format
-        self.color_format = 'rgb'
-        print("================= Pixel Statistics =================")
-        print("Pixel mean: {}".format(self.pixel_mean))
-        print("Pixel std:  {}".format(self.pixel_std))
-
-        # ----------------- Transforms -----------------
-        self.transform = Compose([
-            Resize(img_size=self.img_size),
-            ConvertColorFormat(self.color_format),
-            Normalize(self.pixel_mean, self.pixel_std),
-            ToTensor()
-        ])
-
-
-    def __call__(self, image, target=None, mosaic=False):
-        orig_h, orig_w = image.shape[:2]
-        ratio = [self.img_size / orig_w, self.img_size / orig_h]
-
-        image, target = self.transform(image, target)
-
-        return image, target, ratio

+ 4 - 9
dataset/data_augment/strong_augment.py

@@ -15,7 +15,6 @@ class MosaicAugment(object):
                  ) -> None:
         self.img_size = img_size
         self.is_train = is_train
-        self.keep_ratio    = transform_config['mosaic_keep_ratio']
         self.affine_params = transform_config['affine_params']
         self.mosaic_type   = transform_config['mosaic_type']
 
@@ -37,14 +36,10 @@ class MosaicAugment(object):
             orig_h, orig_w, _ = img_i.shape
 
             # resize
-            if self.keep_ratio:
-                r = self.img_size / max(orig_h, orig_w)
-                if r != 1: 
-                    interp = cv2.INTER_LINEAR if (self.is_train or r > 1) else cv2.INTER_AREA
-                    img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
-            else:
-                interp = cv2.INTER_LINEAR if self.is_train else cv2.INTER_AREA
-                img_i = cv2.resize(img_i, (self.img_size, self.img_size), interpolation=interp)
+            r = self.img_size / max(orig_h, orig_w)
+            if r != 1: 
+                interp = cv2.INTER_LINEAR if (self.is_train or r > 1) else cv2.INTER_AREA
+                img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
             h, w, _ = img_i.shape
 
             # place img in img4

+ 18 - 69
dataset/voc.py

@@ -70,7 +70,6 @@ class VOCDataset(data.Dataset):
                  trans_config = None,
                  transform    = None,
                  is_train     :bool = False,
-                 load_cache   :bool = False,
                  ):
         # ----------- Basic parameters -----------
         self.img_size = img_size
@@ -95,8 +94,8 @@ class VOCDataset(data.Dataset):
         if is_train:
             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
-            self.mixup_augment  = MixupAugment(img_size, trans_config)
+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
         else:
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
@@ -105,13 +104,6 @@ class VOCDataset(data.Dataset):
         print('==============================')
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
-        print('==============================')
-        # ----------- Cached data -----------
-        self.load_cache = load_cache
-        self.cached_datas = None
-        if self.load_cache:
-            self.cached_datas = self._load_cache()
-
 
     # ------------ Basic dataset function ------------
     def __getitem__(self, index):
@@ -121,38 +113,6 @@ class VOCDataset(data.Dataset):
     def __len__(self):
         return self.dataset_size
 
-    def _load_cache(self):
-        data_items = []
-        for idx in range(self.dataset_size):
-            if idx % 2000 == 0:
-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
-
-            # load a data
-            image, target = self.load_image_target(idx)
-            orig_h, orig_w, _ = image.shape
-
-            # resize image
-            r = self.img_size / max(orig_h, orig_w)
-            if r != 1: 
-                interp = cv2.INTER_LINEAR
-                new_size = (int(orig_w * r), int(orig_h * r))
-                image = cv2.resize(image, new_size, interpolation=interp)
-            img_h, img_w = image.shape[:2]
-
-            # rescale bbox
-            boxes = target["boxes"].copy()
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
-            target["boxes"] = boxes
-
-            dict_item = {}
-            dict_item["image"] = image
-            dict_item["target"] = target
-
-            data_items.append(dict_item)
-        
-        return data_items
-
     # ------------ Mosaic & Mixup ------------
     def load_mosaic(self, index):
         # ------------ Prepare 4 indexes of images ------------
@@ -191,28 +151,20 @@ class VOCDataset(data.Dataset):
     
     # ------------ Load data function ------------
     def load_image_target(self, index):
-        # == Load a data from the cached data ==
-        if self.cached_datas is not None:
-            # load a data
-            data_item = self.cached_datas[index]
-            image = data_item["image"]
-            target = data_item["target"]
-        # == Load a data from the local disk ==
-        else:        
-            # load an image
-            image, _ = self.pull_image(index)
-            height, width, channels = image.shape
-
-            # laod an annotation
-            anno, _ = self.pull_anno(index)
-
-            # guard against no boxes via resizing
-            anno = np.array(anno).reshape(-1, 5)
-            target = {
-                "boxes": anno[:, :4],
-                "labels": anno[:, 4],
-                "orig_size": [height, width]
-            }
+        # load an image
+        image, _ = self.pull_image(index)
+        height, width, channels = image.shape
+
+        # laod an annotation
+        anno, _ = self.pull_anno(index)
+
+        # guard against no boxes via resizing
+        anno = np.array(anno).reshape(-1, 5)
+        target = {
+            "boxes": anno[:, :4],
+            "labels": anno[:, 4],
+            "orig_size": [height, width]
+        }
         
         return image, target
 
@@ -262,7 +214,7 @@ if __name__ == "__main__":
     parser.add_argument('-size', '--img_size', default=640, type=int,
                         help='input image size.')
     parser.add_argument('--aug_type', type=str, default='ssd',
-                        help='augmentation type: ssd, yolov5, rtdetr.')
+                        help='augmentation type: ssd, yolo.')
     parser.add_argument('--mosaic', default=0., type=float,
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=0., type=float,
@@ -271,8 +223,6 @@ if __name__ == "__main__":
                         help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
-    parser.add_argument('--load_cache', action="store_true", default=False,
-                        help='Path to the cached data.')
     
     args = parser.parse_args()
 
@@ -295,7 +245,7 @@ if __name__ == "__main__":
         # Mosaic & Mixup
         'mosaic_keep_ratio': False,
         'mosaic_prob': args.mosaic,
-        'mixup_prob': args.mixup,
+        'mixup_prob':  args.mixup,
         'mosaic_type': 'yolov5',
         'mixup_type':  'yolov5',
         'mixup_scale': [0.5, 1.5]
@@ -312,7 +262,6 @@ if __name__ == "__main__":
         trans_config=trans_config,
         transform=transform,
         is_train=args.is_train,
-        load_cache=args.load_cache
         )
     
     np.random.seed(0)

+ 5 - 9
dataset/widerface.py

@@ -51,8 +51,8 @@ class WiderFaceDataset(Dataset):
         if is_train:
             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
-            self.mixup_augment  = MixupAugment(img_size, trans_config)
+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
         else:
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
@@ -61,7 +61,6 @@ class WiderFaceDataset(Dataset):
         print('==============================')
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
-        print('==============================')
 
     # ------------ Basic dataset function ------------
     def __len__(self):
@@ -203,15 +202,13 @@ if __name__ == "__main__":
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=0., type=float,
                         help='mixup augmentation.')
-    parser.add_argument('--mixup_type', type=str, default='yolov5_mixup',
-                        help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
 
     args = parser.parse_args()
 
     trans_config = {
-        'aug_type': args.aug_type,    # optional: ssd, yolov5
+        'aug_type': args.aug_type,    # optional: ssd, yolo
         'pixel_mean': [0., 0., 0.],
         'pixel_std':  [255., 255., 255.],
         # Basic Augment
@@ -227,9 +224,8 @@ if __name__ == "__main__":
         # Mosaic & Mixup
         'mosaic_prob': args.mosaic,
         'mixup_prob': args.mixup,
-        'mosaic_type': 'yolov5_mosaic',
-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
-        'mosaic_keep_ratio': False,
+        'mosaic_type': 'yolov5',
+        'mixup_type':  'yolov5',   # optional: yolov5, yolox
         'mixup_scale': [0.5, 1.5]
     }
     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)

+ 134 - 865
engine.py

@@ -16,17 +16,16 @@ from utils.vis_tools import vis_data
 from evaluator.build import build_evluator
 
 # ----------------- Optimizer & LrScheduler Components -----------------
-from utils.solver.optimizer import build_yolo_optimizer, build_rtdetr_optimizer
+from utils.solver.optimizer import build_optimizer
 from utils.solver.lr_scheduler import build_lambda_lr_scheduler
-from utils.solver.lr_scheduler import build_wp_lr_scheduler, build_lr_scheduler
 
 # ----------------- Dataset Components -----------------
 from dataset.build import build_dataset, build_transform
 
 
 # ----------------------- Det trainers -----------------------
-## YOLOv8 Trainer
-class Yolov8Trainer(object):
+## YOLOX Trainer
+class YoloxTrainer(object):
     def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
         # ------------------- basic parameters -------------------
         self.args = args
@@ -35,9 +34,9 @@ class Yolov8Trainer(object):
         self.device = device
         self.criterion = criterion
         self.world_size = world_size
+        self.grad_accumulate = args.grad_accumulate
+        self.no_aug_epoch = args.no_aug_epoch
         self.heavy_eval = False
-        self.last_opt_step = 0
-        self.clip_grad = 10
         # weak augmentatino stage
         self.second_stage = False
         self.third_stage = False
@@ -47,10 +46,10 @@ class Yolov8Trainer(object):
         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
         os.makedirs(self.path_to_save, exist_ok=True)
 
-        # ---------------------------- Hyperparameters refer to YOLOv8 ----------------------------
-        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.937, 'weight_decay': 5e-4, 'lr0': 0.01}
+        # ---------------------------- Hyperparameters refer to YOLOX ----------------------------
+        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.9, 'weight_decay': 5e-4, 'lr0': 0.01}
         self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
-        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
+        self.lr_schedule_dict = {'scheduler': 'cosine', 'lrf': 0.05}
         self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
 
         # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
@@ -60,9 +59,9 @@ class Yolov8Trainer(object):
 
         # ---------------------------- Build Transform ----------------------------
         self.train_transform, self.trans_cfg = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=model_cfg['max_stride'], is_train=True)
+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
         self.val_transform, _ = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=model_cfg['max_stride'], is_train=False)
+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
 
         # ---------------------------- Build Dataset & Dataloader ----------------------------
         self.dataset, self.dataset_info = build_dataset(self.args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
@@ -75,13 +74,11 @@ class Yolov8Trainer(object):
         self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
 
         # ---------------------------- Build Optimizer ----------------------------
-        accumulate = max(1, round(64 / self.args.batch_size))
-        print('Grad Accumulate: {}'.format(accumulate))
-        self.optimizer_dict['weight_decay'] *= self.args.batch_size * accumulate / 64
-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, self.args.resume)
+        self.optimizer_dict['lr0'] *= self.args.batch_size * self.grad_accumulate / 64
+        self.optimizer, self.start_epoch = build_optimizer(self.optimizer_dict, model, self.args.resume)
 
         # ---------------------------- Build LR Scheduler ----------------------------
-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch)
+        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch - self.no_aug_epoch)
         self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
         if self.args.resume and self.args.resume != 'None':
             self.lr_scheduler.step()
@@ -93,6 +90,7 @@ class Yolov8Trainer(object):
         else:
             self.model_ema = None
 
+
     def train(self, model):
         for epoch in range(self.start_epoch, self.args.max_epoch):
             if self.args.distributed:
@@ -125,7 +123,7 @@ class Yolov8Trainer(object):
                             'epoch': self.epoch,
                             'args': self.args}, 
                             checkpoint_path)
-
+                
             # train one epoch
             self.epoch = epoch
             self.train_one_epoch(model)
@@ -200,15 +198,13 @@ class Yolov8Trainer(object):
         img_size = self.args.img_size
         t0 = time.time()
         nw = epoch_size * self.args.wp_epoch
-        accumulate = accumulate = max(1, round(64 / self.args.batch_size))
 
-        # train one epoch
+        # Train one epoch
         for iter_i, (images, targets) in enumerate(self.train_loader):
             ni = iter_i + self.epoch * epoch_size
             # Warmup
             if ni <= nw:
                 xi = [0, nw]  # x interp
-                accumulate = max(1, np.interp(ni, xi, [1, 64 / self.args.batch_size]).round())
                 for j, x in enumerate(self.optimizer.param_groups):
                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                     x['lr'] = np.interp(
@@ -216,54 +212,45 @@ class Yolov8Trainer(object):
                     if 'momentum' in x:
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
-            # to device
+            # To device
             images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
-            if self.args.multi_scale:
+            if self.args.multi_scale and ni % 10 == 0:
                 images, targets, img_size = self.rescale_image_targets(
                     images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
             else:
                 targets = self.refine_targets(targets, self.args.min_box_size)
                 
-            # visualize train targets
+            # Visualize train targets
             if self.args.vis_tgt:
                 vis_data(images*255, targets)
 
-            # inference
+            # Inference
             with torch.cuda.amp.autocast(enabled=self.args.fp16):
                 outputs = model(images)
-                # loss
+                # Compute loss
                 loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
                 losses = loss_dict['losses']
-                losses *= images.shape[0]  # loss * bs
+                # Grad Accu
+                if self.grad_accumulate > 1: 
+                    losses /= self.grad_accumulate
 
-                # reduce            
                 loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
 
-                # gradient averaged between devices in DDP mode
-                losses *= distributed_utils.get_world_size()
-
-            # backward
+            # Backward
             self.scaler.scale(losses).backward()
 
             # Optimize
-            if ni - self.last_opt_step >= accumulate:
-                if self.clip_grad > 0:
-                    # unscale gradients
-                    self.scaler.unscale_(self.optimizer)
-                    # clip gradients
-                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
-                # optimizer.step
+            if ni % self.grad_accumulate == 0:
                 self.scaler.step(self.optimizer)
                 self.scaler.update()
                 self.optimizer.zero_grad()
                 # ema
                 if self.model_ema is not None:
                     self.model_ema.update(model)
-                self.last_opt_step = ni
 
-            # display
+            # Logs
             if distributed_utils.is_main_process() and iter_i % 10 == 0:
                 t1 = time.time()
                 cur_lr = [param_group['lr']  for param_group in self.optimizer.param_groups]
@@ -273,7 +260,10 @@ class Yolov8Trainer(object):
                 log += '[lr: {:.6f}]'.format(cur_lr[2])
                 # loss infor
                 for k in loss_dict_reduced.keys():
-                    log += '[{}: {:.2f}]'.format(k, loss_dict_reduced[k])
+                    loss_val = loss_dict_reduced[k]
+                    if k == 'losses':
+                        loss_val *= self.grad_accumulate
+                    log += '[{}: {:.2f}]'.format(k, loss_val)
 
                 # other infor
                 log += '[time: {:.2f}]'.format(t1 - t0)
@@ -283,12 +273,14 @@ class Yolov8Trainer(object):
                 print(log, flush=True)
                 
                 t0 = time.time()
-        
+
             if self.args.debug:
                 print("For debug mode, we only train 1 iteration")
                 break
 
-        self.lr_scheduler.step()
+        # LR Schedule
+        if not self.second_stage:
+            self.lr_scheduler.step()
         
     def check_second_stage(self):
         # set second stage
@@ -369,8 +361,13 @@ class Yolov8Trainer(object):
 
         # During training phase, the shape of input image is square.
         old_img_size = images.shape[-1]
-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
+        min_img_size = old_img_size * multi_scale_range[0]
+        max_img_size = old_img_size * multi_scale_range[1]
+
+        # Choose a new image size
+        new_img_size = random.randrange(min_img_size, max_img_size + max_stride, max_stride)
         new_img_size = new_img_size // max_stride * max_stride  # size
+        
         if new_img_size / old_img_size != 1:
             # interpolate
             images = torch.nn.functional.interpolate(
@@ -396,8 +393,8 @@ class Yolov8Trainer(object):
 
         return images, targets, new_img_size
 
-## YOLOX Trainer
-class YoloxTrainer(object):
+## Real-time Convolutional Object Detector Trainer
+class RTCTrainer(object):
     def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
         # ------------------- basic parameters -------------------
         self.args = args
@@ -407,7 +404,7 @@ class YoloxTrainer(object):
         self.criterion = criterion
         self.world_size = world_size
         self.grad_accumulate = args.grad_accumulate
-        self.no_aug_epoch = args.no_aug_epoch
+        self.clip_grad = 35
         self.heavy_eval = False
         # weak augmentatino stage
         self.second_stage = False
@@ -418,39 +415,39 @@ class YoloxTrainer(object):
         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
         os.makedirs(self.path_to_save, exist_ok=True)
 
-        # ---------------------------- Hyperparameters refer to YOLOX ----------------------------
-        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.9, 'weight_decay': 5e-4, 'lr0': 0.01}
-        self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
-        self.lr_schedule_dict = {'scheduler': 'cosine', 'lrf': 0.05}
+        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
+        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 5e-2, 'lr0': 0.001}
+        self.ema_dict = {'ema_decay': 0.9998, 'ema_tau': 2000}
+        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
         self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
 
         # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
-        self.data_cfg = data_cfg
+        self.data_cfg  = data_cfg
         self.model_cfg = model_cfg
         self.trans_cfg = trans_cfg
 
         # ---------------------------- Build Transform ----------------------------
         self.train_transform, self.trans_cfg = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
+            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
         self.val_transform, _ = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
+            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
 
         # ---------------------------- Build Dataset & Dataloader ----------------------------
-        self.dataset, self.dataset_info = build_dataset(self.args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
-        self.train_loader = build_dataloader(self.args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
+        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
+        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
 
         # ---------------------------- Build Evaluator ----------------------------
-        self.evaluator = build_evluator(self.args, self.data_cfg, self.val_transform, self.device)
+        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
 
         # ---------------------------- Build Grad. Scaler ----------------------------
         self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
 
         # ---------------------------- Build Optimizer ----------------------------
-        self.optimizer_dict['lr0'] *= self.args.batch_size * self.grad_accumulate / 64
-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, self.args.resume)
+        self.optimizer_dict['lr0'] *= args.batch_size * self.grad_accumulate / 64
+        self.optimizer, self.start_epoch = build_optimizer(self.optimizer_dict, model, args.resume)
 
         # ---------------------------- Build LR Scheduler ----------------------------
-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch - self.no_aug_epoch)
+        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.max_epoch)
         self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
         if self.args.resume and self.args.resume != 'None':
             self.lr_scheduler.step()
@@ -462,7 +459,6 @@ class YoloxTrainer(object):
         else:
             self.model_ema = None
 
-
     def train(self, model):
         for epoch in range(self.start_epoch, self.args.max_epoch):
             if self.args.distributed:
@@ -495,7 +491,7 @@ class YoloxTrainer(object):
                             'epoch': self.epoch,
                             'args': self.args}, 
                             checkpoint_path)
-                
+
             # train one epoch
             self.epoch = epoch
             self.train_one_epoch(model)
@@ -565,14 +561,21 @@ class YoloxTrainer(object):
             dist.barrier()
 
     def train_one_epoch(self, model):
+        metric_logger = MetricLogger(delimiter="  ")
+        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
+        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
+        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
+        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
+        epoch_size = len(self.train_loader)
+        print_freq = 10
+
         # basic parameters
         epoch_size = len(self.train_loader)
         img_size = self.args.img_size
-        t0 = time.time()
         nw = epoch_size * self.args.wp_epoch
 
         # Train one epoch
-        for iter_i, (images, targets) in enumerate(self.train_loader):
+        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
             ni = iter_i + self.epoch * epoch_size
             # Warmup
             if ni <= nw:
@@ -588,7 +591,7 @@ class YoloxTrainer(object):
             images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
-            if self.args.multi_scale and ni % 10 == 0:
+            if self.args.multi_scale:
                 images, targets, img_size = self.rescale_image_targets(
                     images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
             else:
@@ -604,8 +607,8 @@ class YoloxTrainer(object):
                 # Compute loss
                 loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
                 losses = loss_dict['losses']
-                # Grad Accu
-                if self.grad_accumulate > 1: 
+                # Grad Accumulate
+                if self.grad_accumulate > 1:
                     losses /= self.grad_accumulate
 
                 loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
@@ -615,6 +618,13 @@ class YoloxTrainer(object):
 
             # Optimize
             if ni % self.grad_accumulate == 0:
+                grad_norm = None
+                if self.clip_grad > 0:
+                    # unscale gradients
+                    self.scaler.unscale_(self.optimizer)
+                    # clip gradients
+                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
+                # optimizer.step
                 self.scaler.step(self.optimizer)
                 self.scaler.update()
                 self.optimizer.zero_grad()
@@ -622,29 +632,11 @@ class YoloxTrainer(object):
                 if self.model_ema is not None:
                     self.model_ema.update(model)
 
-            # Logs
-            if distributed_utils.is_main_process() and iter_i % 10 == 0:
-                t1 = time.time()
-                cur_lr = [param_group['lr']  for param_group in self.optimizer.param_groups]
-                # basic infor
-                log =  '[Epoch: {}/{}]'.format(self.epoch, self.args.max_epoch)
-                log += '[Iter: {}/{}]'.format(iter_i, epoch_size)
-                log += '[lr: {:.6f}]'.format(cur_lr[2])
-                # loss infor
-                for k in loss_dict_reduced.keys():
-                    loss_val = loss_dict_reduced[k]
-                    if k == 'losses':
-                        loss_val *= self.grad_accumulate
-                    log += '[{}: {:.2f}]'.format(k, loss_val)
-
-                # other infor
-                log += '[time: {:.2f}]'.format(t1 - t0)
-                log += '[size: {}]'.format(img_size)
-
-                # print log infor
-                print(log, flush=True)
-                
-                t0 = time.time()
+            # Update log
+            metric_logger.update(**loss_dict_reduced)
+            metric_logger.update(lr=self.optimizer.param_groups[2]["lr"])
+            metric_logger.update(grad_norm=grad_norm)
+            metric_logger.update(size=img_size)
 
             if self.args.debug:
                 print("For debug mode, we only train 1 iteration")
@@ -653,60 +645,11 @@ class YoloxTrainer(object):
         # LR Schedule
         if not self.second_stage:
             self.lr_scheduler.step()
-        
-    def check_second_stage(self):
-        # set second stage
-        print('============== Second stage of Training ==============')
-        self.second_stage = True
-
-        # close mosaic augmentation
-        if self.train_loader.dataset.mosaic_prob > 0.:
-            print(' - Close < Mosaic Augmentation > ...')
-            self.train_loader.dataset.mosaic_prob = 0.
-            self.heavy_eval = True
-
-        # close mixup augmentation
-        if self.train_loader.dataset.mixup_prob > 0.:
-            print(' - Close < Mixup Augmentation > ...')
-            self.train_loader.dataset.mixup_prob = 0.
-            self.heavy_eval = True
-
-        # close rotation augmentation
-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
-            print(' - Close < degress of rotation > ...')
-            self.trans_cfg['degrees'] = 0.0
-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
-            print(' - Close < shear of rotation >...')
-            self.trans_cfg['shear'] = 0.0
-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
-            print(' - Close < perspective of rotation > ...')
-            self.trans_cfg['perspective'] = 0.0
-
-        # build a new transform for second stage
-        print(' - Rebuild transforms ...')
-        self.train_transform, self.trans_cfg = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.train_loader.dataset.transform = self.train_transform
-        
-    def check_third_stage(self):
-        # set third stage
-        print('============== Third stage of Training ==============')
-        self.third_stage = True
 
-        # close random affine
-        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
-            print(' - Close < translate of affine > ...')
-            self.trans_cfg['translate'] = 0.0
-        if 'scale' in self.trans_cfg.keys():
-            print(' - Close < scale of affine >...')
-            self.trans_cfg['scale'] = [1.0, 1.0]
+        # Gather the stats from all processes
+        metric_logger.synchronize_between_processes()
+        print("Averaged stats:", metric_logger)
 
-        # build a new transform for second stage
-        print(' - Rebuild transforms ...')
-        self.train_transform, self.trans_cfg = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.train_loader.dataset.transform = self.train_transform
-        
     def refine_targets(self, targets, min_box_size):
         # rescale targets
         for tgt in targets:
@@ -733,8 +676,13 @@ class YoloxTrainer(object):
 
         # During training phase, the shape of input image is square.
         old_img_size = images.shape[-1]
-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
+        min_img_size = old_img_size * multi_scale_range[0]
+        max_img_size = old_img_size * multi_scale_range[1]
+
+        # Choose a new image size
+        new_img_size = random.randrange(min_img_size, max_img_size + max_stride, max_stride)
         new_img_size = new_img_size // max_stride * max_stride  # size
+
         if new_img_size / old_img_size != 1:
             # interpolate
             images = torch.nn.functional.interpolate(
@@ -760,746 +708,67 @@ class YoloxTrainer(object):
 
         return images, targets, new_img_size
 
-## Real-time Convolutional Object Detector Trainer
-class RTCTrainer(object):
-    def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
-        # ------------------- basic parameters -------------------
-        self.args = args
-        self.epoch = 0
-        self.best_map = -1.
-        self.device = device
-        self.criterion = criterion
-        self.world_size = world_size
-        self.grad_accumulate = args.grad_accumulate
-        self.clip_grad = 35
-        self.heavy_eval = False
-        # weak augmentatino stage
-        self.second_stage = False
-        self.third_stage = False
-        self.second_stage_epoch = args.no_aug_epoch
-        self.third_stage_epoch = args.no_aug_epoch // 2
-        # path to save model
-        self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
-        os.makedirs(self.path_to_save, exist_ok=True)
+    def check_second_stage(self):
+        # set second stage
+        print('============== Second stage of Training ==============')
+        self.second_stage = True
 
-        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
-        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 5e-2, 'lr0': 0.001}
-        self.ema_dict = {'ema_decay': 0.9998, 'ema_tau': 2000}
-        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
-        self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
+        # close mosaic augmentation
+        if self.train_loader.dataset.mosaic_prob > 0.:
+            print(' - Close < Mosaic Augmentation > ...')
+            self.train_loader.dataset.mosaic_prob = 0.
+            self.heavy_eval = True
 
-        # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
-        self.data_cfg  = data_cfg
-        self.model_cfg = model_cfg
-        self.trans_cfg = trans_cfg
+        # close mixup augmentation
+        if self.train_loader.dataset.mixup_prob > 0.:
+            print(' - Close < Mixup Augmentation > ...')
+            self.train_loader.dataset.mixup_prob = 0.
+            self.heavy_eval = True
 
-        # ---------------------------- Build Transform ----------------------------
+        # close rotation augmentation
+        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
+            print(' - Close < degress of rotation > ...')
+            self.trans_cfg['degrees'] = 0.0
+        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
+            print(' - Close < shear of rotation >...')
+            self.trans_cfg['shear'] = 0.0
+        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
+            print(' - Close < perspective of rotation > ...')
+            self.trans_cfg['perspective'] = 0.0
+
+        # build a new transform for second stage
+        print(' - Rebuild transforms ...')
         self.train_transform, self.trans_cfg = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.val_transform, _ = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
+        self.train_loader.dataset.transform = self.train_transform
+        
+    def check_third_stage(self):
+        # set third stage
+        print('============== Third stage of Training ==============')
+        self.third_stage = True
 
-        # ---------------------------- Build Dataset & Dataloader ----------------------------
-        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
-        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
-
-        # ---------------------------- Build Evaluator ----------------------------
-        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
-
-        # ---------------------------- Build Grad. Scaler ----------------------------
-        self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
-
-        # ---------------------------- Build Optimizer ----------------------------
-        self.optimizer_dict['lr0'] *= args.batch_size * self.grad_accumulate / 64
-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, args.resume)
-
-        # ---------------------------- Build LR Scheduler ----------------------------
-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.max_epoch)
-        self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
-        if self.args.resume and self.args.resume != 'None':
-            self.lr_scheduler.step()
-
-        # ---------------------------- Build Model-EMA ----------------------------
-        if self.args.ema and distributed_utils.get_rank() in [-1, 0]:
-            print('Build ModelEMA ...')
-            self.model_ema = ModelEMA(self.ema_dict, model, self.start_epoch * len(self.train_loader))
-        else:
-            self.model_ema = None
-
-    def train(self, model):
-        for epoch in range(self.start_epoch, self.args.max_epoch):
-            if self.args.distributed:
-                self.train_loader.batch_sampler.sampler.set_epoch(epoch)
-
-            # check second stage
-            if epoch >= (self.args.max_epoch - self.second_stage_epoch - 1) and not self.second_stage:
-                self.check_second_stage()
-                # save model of the last mosaic epoch
-                weight_name = '{}_last_mosaic_epoch.pth'.format(self.args.model)
-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                print('Saving state of the last Mosaic epoch-{}.'.format(self.epoch))
-                torch.save({'model': model.state_dict(),
-                            'mAP': round(self.evaluator.map*100, 1),
-                            'optimizer': self.optimizer.state_dict(),
-                            'epoch': self.epoch,
-                            'args': self.args}, 
-                            checkpoint_path)
-
-            # check third stage
-            if epoch >= (self.args.max_epoch - self.third_stage_epoch - 1) and not self.third_stage:
-                self.check_third_stage()
-                # save model of the last mosaic epoch
-                weight_name = '{}_last_weak_augment_epoch.pth'.format(self.args.model)
-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                print('Saving state of the last weak augment epoch-{}.'.format(self.epoch))
-                torch.save({'model': model.state_dict(),
-                            'mAP': round(self.evaluator.map*100, 1),
-                            'optimizer': self.optimizer.state_dict(),
-                            'epoch': self.epoch,
-                            'args': self.args}, 
-                            checkpoint_path)
-
-            # train one epoch
-            self.epoch = epoch
-            self.train_one_epoch(model)
-
-            # eval one epoch
-            if self.heavy_eval:
-                model_eval = model.module if self.args.distributed else model
-                self.eval(model_eval)
-            else:
-                model_eval = model.module if self.args.distributed else model
-                if (epoch % self.args.eval_epoch) == 0 or (epoch == self.args.max_epoch - 1):
-                    self.eval(model_eval)
-
-            if self.args.debug:
-                print("For debug mode, we only train 1 epoch")
-                break
-
-    def eval(self, model):
-        # chech model
-        model_eval = model if self.model_ema is None else self.model_ema.ema
-
-        if distributed_utils.is_main_process():
-            # check evaluator
-            if self.evaluator is None:
-                print('No evaluator ... save model and go on training.')
-                print('Saving state, epoch: {}'.format(self.epoch))
-                weight_name = '{}_no_eval.pth'.format(self.args.model)
-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                torch.save({'model': model_eval.state_dict(),
-                            'mAP': -1.,
-                            'optimizer': self.optimizer.state_dict(),
-                            'epoch': self.epoch,
-                            'args': self.args}, 
-                            checkpoint_path)               
-            else:
-                print('eval ...')
-                # set eval mode
-                model_eval.trainable = False
-                model_eval.eval()
-
-                # evaluate
-                with torch.no_grad():
-                    self.evaluator.evaluate(model_eval)
-
-                # save model
-                cur_map = self.evaluator.map
-                if cur_map > self.best_map:
-                    # update best-map
-                    self.best_map = cur_map
-                    # save model
-                    print('Saving state, epoch:', self.epoch)
-                    weight_name = '{}_best.pth'.format(self.args.model)
-                    checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                    torch.save({'model': model_eval.state_dict(),
-                                'mAP': round(self.best_map*100, 1),
-                                'optimizer': self.optimizer.state_dict(),
-                                'epoch': self.epoch,
-                                'args': self.args}, 
-                                checkpoint_path)                      
-
-                # set train mode.
-                model_eval.trainable = True
-                model_eval.train()
-
-        if self.args.distributed:
-            # wait for all processes to synchronize
-            dist.barrier()
-
-    def train_one_epoch(self, model):
-        metric_logger = MetricLogger(delimiter="  ")
-        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
-        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
-        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
-        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
-        epoch_size = len(self.train_loader)
-        print_freq = 10
-
-        # basic parameters
-        epoch_size = len(self.train_loader)
-        img_size = self.args.img_size
-        nw = epoch_size * self.args.wp_epoch
-
-        # Train one epoch
-        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
-            ni = iter_i + self.epoch * epoch_size
-            # Warmup
-            if ni <= nw:
-                xi = [0, nw]  # x interp
-                for j, x in enumerate(self.optimizer.param_groups):
-                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
-                    x['lr'] = np.interp(
-                        ni, xi, [self.warmup_dict['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * self.lf(self.epoch)])
-                    if 'momentum' in x:
-                        x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
-                                
-            # To device
-            images = images.to(self.device, non_blocking=True).float()
-
-            # Multi scale
-            if self.args.multi_scale:
-                images, targets, img_size = self.rescale_image_targets(
-                    images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
-            else:
-                targets = self.refine_targets(targets, self.args.min_box_size)
-                
-            # Visualize train targets
-            if self.args.vis_tgt:
-                vis_data(images*255, targets)
-
-            # Inference
-            with torch.cuda.amp.autocast(enabled=self.args.fp16):
-                outputs = model(images)
-                # Compute loss
-                loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
-                losses = loss_dict['losses']
-                # Grad Accumulate
-                if self.grad_accumulate > 1:
-                    losses /= self.grad_accumulate
-
-                loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
-
-            # Backward
-            self.scaler.scale(losses).backward()
-
-            # Optimize
-            if ni % self.grad_accumulate == 0:
-                grad_norm = None
-                if self.clip_grad > 0:
-                    # unscale gradients
-                    self.scaler.unscale_(self.optimizer)
-                    # clip gradients
-                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
-                # optimizer.step
-                self.scaler.step(self.optimizer)
-                self.scaler.update()
-                self.optimizer.zero_grad()
-                # ema
-                if self.model_ema is not None:
-                    self.model_ema.update(model)
-
-            # Update log
-            metric_logger.update(**loss_dict_reduced)
-            metric_logger.update(lr=self.optimizer.param_groups[2]["lr"])
-            metric_logger.update(grad_norm=grad_norm)
-            metric_logger.update(size=img_size)
-
-            if self.args.debug:
-                print("For debug mode, we only train 1 iteration")
-                break
-
-        # LR Schedule
-        if not self.second_stage:
-            self.lr_scheduler.step()
-
-        # Gather the stats from all processes
-        metric_logger.synchronize_between_processes()
-        print("Averaged stats:", metric_logger)
-
-    def refine_targets(self, targets, min_box_size):
-        # rescale targets
-        for tgt in targets:
-            boxes = tgt["boxes"].clone()
-            labels = tgt["labels"].clone()
-            # refine tgt
-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
-            keep = (min_tgt_size >= min_box_size)
-
-            tgt["boxes"] = boxes[keep]
-            tgt["labels"] = labels[keep]
-        
-        return targets
-
-    def rescale_image_targets(self, images, targets, stride, min_box_size, multi_scale_range=[0.5, 1.5]):
-        """
-            Deployed for Multi scale trick.
-        """
-        if isinstance(stride, int):
-            max_stride = stride
-        elif isinstance(stride, list):
-            max_stride = max(stride)
-
-        # During training phase, the shape of input image is square.
-        old_img_size = images.shape[-1]
-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
-        new_img_size = new_img_size // max_stride * max_stride  # size
-        if new_img_size / old_img_size != 1:
-            # interpolate
-            images = torch.nn.functional.interpolate(
-                                input=images, 
-                                size=new_img_size, 
-                                mode='bilinear', 
-                                align_corners=False)
-        # rescale targets
-        for tgt in targets:
-            boxes = tgt["boxes"].clone()
-            labels = tgt["labels"].clone()
-            boxes = torch.clamp(boxes, 0, old_img_size)
-            # rescale box
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / old_img_size * new_img_size
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / old_img_size * new_img_size
-            # refine tgt
-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
-            keep = (min_tgt_size >= min_box_size)
-
-            tgt["boxes"] = boxes[keep]
-            tgt["labels"] = labels[keep]
-
-        return images, targets, new_img_size
-
-    def check_second_stage(self):
-        # set second stage
-        print('============== Second stage of Training ==============')
-        self.second_stage = True
-
-        # close mosaic augmentation
-        if self.train_loader.dataset.mosaic_prob > 0.:
-            print(' - Close < Mosaic Augmentation > ...')
-            self.train_loader.dataset.mosaic_prob = 0.
-            self.heavy_eval = True
-
-        # close mixup augmentation
-        if self.train_loader.dataset.mixup_prob > 0.:
-            print(' - Close < Mixup Augmentation > ...')
-            self.train_loader.dataset.mixup_prob = 0.
-            self.heavy_eval = True
-
-        # close rotation augmentation
-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
-            print(' - Close < degress of rotation > ...')
-            self.trans_cfg['degrees'] = 0.0
-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
-            print(' - Close < shear of rotation >...')
-            self.trans_cfg['shear'] = 0.0
-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
-            print(' - Close < perspective of rotation > ...')
-            self.trans_cfg['perspective'] = 0.0
-
-        # build a new transform for second stage
-        print(' - Rebuild transforms ...')
-        self.train_transform, self.trans_cfg = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.train_loader.dataset.transform = self.train_transform
-        
-    def check_third_stage(self):
-        # set third stage
-        print('============== Third stage of Training ==============')
-        self.third_stage = True
-
-        # close random affine
-        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
-            print(' - Close < translate of affine > ...')
-            self.trans_cfg['translate'] = 0.0
-        if 'scale' in self.trans_cfg.keys():
-            print(' - Close < scale of affine >...')
-            self.trans_cfg['scale'] = [1.0, 1.0]
-
-        # build a new transform for second stage
-        print(' - Rebuild transforms ...')
-        self.train_transform, self.trans_cfg = build_transform(
-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.train_loader.dataset.transform = self.train_transform
-
-## Real-time DETR Trainer
-class RTDetrTrainer(object):
-    def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
-        # ------------------- Basic parameters -------------------
-        self.args = args
-        self.epoch = 0
-        self.best_map = -1.
-        self.device = device
-        self.criterion = criterion
-        self.world_size = world_size
-        self.grad_accumulate = args.grad_accumulate
-        self.clip_grad = 0.1
-        self.heavy_eval = False
-        self.normalize_bbox = True
-        # close AMP for RT-DETR
-        self.args.fp16 = False
-        # weak augmentatino stage
-        self.second_stage = False
-        self.second_stage_epoch = -1
-        # path to save model
-        self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
-        os.makedirs(self.path_to_save, exist_ok=True)
-
-        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
-        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 0.0001, 'lr0': 0.0001, 'backbone_lr_ratio': 0.1}
-        self.warmup_dict = {'warmup': 'linear', 'warmup_iters': 2000, 'warmup_factor': 0.00066667}
-        self.lr_schedule_dict = {'lr_scheduler': 'step', 'lr_epoch': [self.args.max_epoch // 12 * 11]}
-        self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
-
-        # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
-        self.data_cfg  = data_cfg
-        self.model_cfg = model_cfg
-        self.trans_cfg = trans_cfg
-
-        # ---------------------------- Build Transform ----------------------------
-        self.train_transform, self.trans_cfg = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        self.val_transform, _ = build_transform(
-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
-        if self.trans_cfg["mosaic_prob"] > 0:
-            self.second_stage_epoch = 5
-
-        # ---------------------------- Build Dataset & Dataloader ----------------------------
-        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
-        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
-
-        # ---------------------------- Build Evaluator ----------------------------
-        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
-
-        # ---------------------------- Build Grad. Scaler ----------------------------
-        self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
-
-        # ---------------------------- Build Optimizer ----------------------------
-        self.optimizer_dict['lr0'] *= self.args.batch_size / 16.  # auto lr scaling
-        self.optimizer, self.start_epoch = build_rtdetr_optimizer(self.optimizer_dict, model, self.args.resume)
-
-        # ---------------------------- Build LR Scheduler ----------------------------
-        self.wp_lr_scheduler = build_wp_lr_scheduler(self.warmup_dict, self.optimizer_dict['lr0'])
-        self.lr_scheduler    = build_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.resume)
-
-        # ---------------------------- Build Model-EMA ----------------------------
-        if self.args.ema and distributed_utils.get_rank() in [-1, 0]:
-            print('Build ModelEMA ...')
-            self.model_ema = ModelEMA(self.ema_dict, model, self.start_epoch * len(self.train_loader))
-        else:
-            self.model_ema = None
-
-    def train(self, model):
-        for epoch in range(self.start_epoch, self.args.max_epoch):
-            if self.args.distributed:
-                self.train_loader.batch_sampler.sampler.set_epoch(epoch)
-
-            # check second stage
-            if epoch >= (self.args.max_epoch - self.second_stage_epoch - 1) and not self.second_stage:
-                self.check_second_stage()
-                # save model of the last mosaic epoch
-                weight_name = '{}_last_mosaic_epoch.pth'.format(self.args.model)
-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                print('Saving state of the last Mosaic epoch-{}.'.format(self.epoch))
-                torch.save({'model': model.state_dict(),
-                            'mAP': round(self.evaluator.map*100, 1),
-                            'optimizer': self.optimizer.state_dict(),
-                            'epoch': self.epoch,
-                            'args': self.args}, 
-                            checkpoint_path)
-
-            # train one epoch
-            self.epoch = epoch
-            self.train_one_epoch(model)
-
-            # eval one epoch
-            if self.heavy_eval:
-                model_eval = model.module if self.args.distributed else model
-                self.eval(model_eval)
-            else:
-                model_eval = model.module if self.args.distributed else model
-                if (epoch % self.args.eval_epoch) == 0 or (epoch == self.args.max_epoch - 1):
-                    self.eval(model_eval)
-
-            if self.args.debug:
-                print("For debug mode, we only train 1 epoch")
-                break
-
-    def eval(self, model):
-        # chech model
-        model_eval = model if self.model_ema is None else self.model_ema.ema
-
-        if distributed_utils.is_main_process():
-            # check evaluator
-            if self.evaluator is None:
-                print('No evaluator ... save model and go on training.')
-                print('Saving state, epoch: {}'.format(self.epoch))
-                weight_name = '{}_no_eval.pth'.format(self.args.model)
-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                torch.save({'model': model_eval.state_dict(),
-                            'mAP': -1.,
-                            'optimizer': self.optimizer.state_dict(),
-                            'epoch': self.epoch,
-                            'args': self.args}, 
-                            checkpoint_path)               
-            else:
-                print('eval ...')
-                # set eval mode
-                model_eval.eval()
-
-                # evaluate
-                with torch.no_grad():
-                    self.evaluator.evaluate(model_eval)
-
-                # save model
-                cur_map = self.evaluator.map
-                if cur_map > self.best_map:
-                    # update best-map
-                    self.best_map = cur_map
-                    # save model
-                    print('Saving state, epoch:', self.epoch)
-                    weight_name = '{}_best.pth'.format(self.args.model)
-                    checkpoint_path = os.path.join(self.path_to_save, weight_name)
-                    torch.save({'model': model_eval.state_dict(),
-                                'mAP': round(self.best_map*100, 1),
-                                'optimizer': self.optimizer.state_dict(),
-                                'epoch': self.epoch,
-                                'args': self.args}, 
-                                checkpoint_path)                      
-
-                # set train mode.
-                model_eval.train()
-
-        if self.args.distributed:
-            # wait for all processes to synchronize
-            dist.barrier()
-
-    def train_one_epoch(self, model):
-        metric_logger = MetricLogger(delimiter="  ")
-        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
-        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
-        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
-        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
-        epoch_size = len(self.train_loader)
-        print_freq = 10
-
-        # basic parameters
-        epoch_size = len(self.train_loader)
-        img_size = self.args.img_size
-        nw = self.warmup_dict['warmup_iters']
-        lr_warmup_stage = True
-
-        # Train one epoch
-        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
-            ni = iter_i + self.epoch * epoch_size
-            # WarmUp
-            if ni < nw and lr_warmup_stage:
-                self.wp_lr_scheduler(ni, self.optimizer)
-            elif ni == nw and lr_warmup_stage:
-                print('Warmup stage is over.')
-                lr_warmup_stage = False
-                self.wp_lr_scheduler.set_lr(self.optimizer, self.optimizer_dict['lr0'], self.optimizer_dict['lr0'])
-                                            
-            # To device
-            images = images.to(self.device, non_blocking=True).float()
-            for tgt in targets:
-                tgt['boxes'] = tgt['boxes'].to(self.device)
-                tgt['labels'] = tgt['labels'].to(self.device)
-
-            # Multi scale
-            if self.args.multi_scale:
-                images, targets, img_size = self.rescale_image_targets(
-                    images, targets, self.model_cfg['max_stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
-            else:
-                targets = self.refine_targets(img_size, targets, self.args.min_box_size)
-
-            # xyxy -> cxcywh
-            targets = self.box_xyxy_to_cxcywh(targets)
-                
-            # Visualize train targets
-            if self.args.vis_tgt:
-                targets = self.box_cxcywh_to_xyxy(targets)
-                vis_data(images, targets, normalized_bbox=self.normalize_bbox,
-                         pixel_mean=self.trans_cfg['pixel_mean'], pixel_std=self.trans_cfg['pixel_std'])
-                targets = self.box_xyxy_to_cxcywh(targets)
-
-            # Inference
-            with torch.cuda.amp.autocast(enabled=self.args.fp16):
-                outputs = model(images, targets)    
-                loss_dict = self.criterion(outputs, targets)
-                losses = sum(loss_dict.values())
-                # Grad Accumulate
-                if self.grad_accumulate > 1:
-                    losses /= self.grad_accumulate
-                loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
-
-            # Backward
-            self.scaler.scale(losses).backward()
-
-            # Optimize
-            if ni % self.grad_accumulate == 0:
-                grad_norm = None
-                if self.clip_grad > 0:
-                    # unscale gradients
-                    self.scaler.unscale_(self.optimizer)
-                    # clip gradients
-                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
-                # optimizer.step
-                self.scaler.step(self.optimizer)
-                self.scaler.update()
-                self.optimizer.zero_grad()
-                # ema
-                if self.model_ema is not None:
-                    self.model_ema.update(model)
-
-            # Update log
-            metric_logger.update(loss=losses.item(), **loss_dict_reduced)
-            metric_logger.update(lr=self.optimizer.param_groups[0]["lr"])
-            metric_logger.update(grad_norm=grad_norm)
-            metric_logger.update(size=img_size)
-
-            if self.args.debug:
-                print("For debug mode, we only train 1 iteration")
-                break
-    
-        # LR Scheduler
-        self.lr_scheduler.step()
-        
-    def refine_targets(self, img_size, targets, min_box_size):
-        # rescale targets
-        for tgt in targets:
-            boxes = tgt["boxes"].clone()
-            labels = tgt["labels"].clone()
-            # refine tgt
-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
-            keep = (min_tgt_size >= min_box_size)
-            if self.normalize_bbox:
-                # normalize box
-                boxes[:, [0, 2]] = boxes[:, [0, 2]] / img_size
-                boxes[:, [1, 3]] = boxes[:, [1, 3]] / img_size
-
-            tgt["boxes"] = boxes[keep]
-            tgt["labels"] = labels[keep]
-        
-        return targets
-
-    def rescale_image_targets(self, images, targets, stride, min_box_size, multi_scale_range=[0.5, 1.5]):
-        """
-            Deployed for Multi scale trick.
-        """
-        if isinstance(stride, int):
-            max_stride = stride
-        elif isinstance(stride, list):
-            max_stride = max(stride)
-
-        # During training phase, the shape of input image is square.
-        old_img_size = images.shape[-1]
-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
-        new_img_size = new_img_size // max_stride * max_stride  # size
-        if new_img_size / old_img_size != 1:
-            # interpolate
-            images = torch.nn.functional.interpolate(
-                                input=images, 
-                                size=new_img_size, 
-                                mode='bilinear', 
-                                align_corners=False)
-        # rescale targets
-        for tgt in targets:
-            boxes = tgt["boxes"].clone()
-            labels = tgt["labels"].clone()
-            boxes = torch.clamp(boxes, 0, old_img_size)
-            # rescale box
-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / old_img_size * new_img_size
-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / old_img_size * new_img_size
-            # refine tgt
-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
-            keep = (min_tgt_size >= min_box_size)
-            if self.normalize_bbox:
-                # normalize box
-                boxes[:, [0, 2]] = boxes[:, [0, 2]] / new_img_size
-                boxes[:, [1, 3]] = boxes[:, [1, 3]] / new_img_size
-
-            tgt["boxes"] = boxes[keep]
-            tgt["labels"] = labels[keep]
-
-        return images, targets, new_img_size
-
-    def box_xyxy_to_cxcywh(self, targets):
-        # rescale targets
-        for tgt in targets:
-            boxes_xyxy = tgt["boxes"].clone()
-            # rescale box
-            cxcy = (boxes_xyxy[..., :2] + boxes_xyxy[..., 2:]) * 0.5
-            bwbh = boxes_xyxy[..., 2:] - boxes_xyxy[..., :2]
-            boxes_bwbh = torch.cat([cxcy, bwbh], dim=-1)
-
-            tgt["boxes"] = boxes_bwbh
-
-        return targets
-
-    def box_cxcywh_to_xyxy(self, targets):
-        # rescale targets
-        for tgt in targets:
-            boxes_cxcywh = tgt["boxes"].clone()
-            # rescale box
-            x1y1 = boxes_cxcywh[..., :2] - boxes_cxcywh[..., 2:] * 0.5
-            x2y2 = boxes_cxcywh[..., :2] + boxes_cxcywh[..., 2:] * 0.5
-            boxes_bwbh = torch.cat([x1y1, x2y2], dim=-1)
-
-            tgt["boxes"] = boxes_bwbh
-
-        return targets
-
-    def check_second_stage(self):
-        # set second stage
-        print('============== Second stage of Training ==============')
-        self.second_stage = True
-
-        # close mosaic augmentation
-        if self.train_loader.dataset.mosaic_prob > 0.:
-            print(' - Close < Mosaic Augmentation > ...')
-            self.train_loader.dataset.mosaic_prob = 0.
-            self.heavy_eval = True
-
-        # close mixup augmentation
-        if self.train_loader.dataset.mixup_prob > 0.:
-            print(' - Close < Mixup Augmentation > ...')
-            self.train_loader.dataset.mixup_prob = 0.
-            self.heavy_eval = True
-
-        # close rotation augmentation
-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
-            print(' - Close < degress of rotation > ...')
-            self.trans_cfg['degrees'] = 0.0
-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
-            print(' - Close < shear of rotation >...')
-            self.trans_cfg['shear'] = 0.0
-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
-            print(' - Close < perspective of rotation > ...')
-            self.trans_cfg['perspective'] = 0.0
+        # close random affine
+        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
+            print(' - Close < translate of affine > ...')
+            self.trans_cfg['translate'] = 0.0
+        if 'scale' in self.trans_cfg.keys():
+            print(' - Close < scale of affine >...')
+            self.trans_cfg['scale'] = [1.0, 1.0]
 
         # build a new transform for second stage
         print(' - Rebuild transforms ...')
         self.train_transform, self.trans_cfg = build_transform(
             args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
-        
         self.train_loader.dataset.transform = self.train_transform
 
 
 # Build Trainer
 def build_trainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
     # ----------------------- Det trainers -----------------------
-    if   model_cfg['trainer_type'] == 'yolov8':
-        return Yolov8Trainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
+    if   model_cfg['trainer_type'] == 'yolo':
+        return RTCTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
     elif model_cfg['trainer_type'] == 'yolox':
         return YoloxTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
-    elif model_cfg['trainer_type'] == 'rtcdet':
-        return RTCTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
-    elif model_cfg['trainer_type'] == 'rtdetr':
-        return RTDetrTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
     else:
         raise NotImplementedError(model_cfg['trainer_type'])
     

+ 1 - 53
utils/solver/optimizer.py

@@ -2,7 +2,7 @@ import torch
 import torch.nn as nn
 
 
-def build_yolo_optimizer(cfg, model, resume=None):
+def build_optimizer(cfg, model, resume=None):
     print('==============================')
     print('Optimizer: {}'.format(cfg['optimizer']))
     print('--base lr: {}'.format(cfg['lr0']))
@@ -42,55 +42,3 @@ def build_yolo_optimizer(cfg, model, resume=None):
         del checkpoint, checkpoint_state_dict
                                                         
     return optimizer, start_epoch
-
-
-def build_rtdetr_optimizer(cfg, model, resume=None):
-    print('==============================')
-    print('Optimizer: {}'.format(cfg['optimizer']))
-    print('--base lr: {}'.format(cfg['lr0']))
-    print('--weight_decay: {}'.format(cfg['weight_decay']))
-
-    # ------------- Divide model's parameters -------------
-    param_dicts = [], [], [], [], [], []
-    norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)]
-    for n, p in model.named_parameters():
-        # Non-Backbone's learnable parameters
-        if "backbone" not in n and p.requires_grad:
-            if "bias" == n.split(".")[-1]:
-                param_dicts[0].append(p)      # no weight decay for all layers' bias
-            else:
-                if n.split(".")[-2] in norm_names:
-                    param_dicts[1].append(p)  # no weight decay for all NormLayers' weight
-                else:
-                    param_dicts[2].append(p)  # weight decay for all Non-NormLayers' weight
-        # Backbone's learnable parameters
-        elif "backbone" in n and p.requires_grad:
-            if "bias" == n.split(".")[-1]:
-                param_dicts[3].append(p)      # no weight decay for all layers' bias
-            else:
-                if n.split(".")[-2] in norm_names:
-                    param_dicts[4].append(p)  # no weight decay for all NormLayers' weight
-                else:
-                    param_dicts[5].append(p)  # weight decay for all Non-NormLayers' weight
-
-    # Non-Backbone's learnable parameters
-    optimizer = torch.optim.AdamW(param_dicts[0], lr=cfg['lr0'], weight_decay=0.0)
-    optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0})
-    optimizer.add_param_group({"params": param_dicts[2], "weight_decay": cfg['weight_decay']})
-
-    # Backbone's learnable parameters
-    backbone_lr = cfg['lr0'] * cfg['backbone_lr_ratio']
-    optimizer.add_param_group({"params": param_dicts[3], "lr": backbone_lr, "weight_decay": 0.0})
-    optimizer.add_param_group({"params": param_dicts[4], "lr": backbone_lr, "weight_decay": 0.0})
-    optimizer.add_param_group({"params": param_dicts[5], "lr": backbone_lr, "weight_decay": cfg['weight_decay']})
-
-    start_epoch = 0
-    if resume and resume != 'None':
-        print('keep training: ', resume)
-        checkpoint = torch.load(resume)
-        # checkpoint state dict
-        checkpoint_state_dict = checkpoint.pop("optimizer")
-        optimizer.load_state_dict(checkpoint_state_dict)
-        start_epoch = checkpoint.pop("epoch") + 1
-                                                        
-    return optimizer, start_epoch