hace 1 año · 9dbbba8664
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -16,13 +16,15 @@ def build_dataset_config(args):
 
				 
			
 
				 # ------------------ Transform Config ------------------
			
 
				 from .data_config.transform_config import (
			
 
				+    # SSD-Style
			
 
				+    ssd_trans_config,
			
 
				     # YOLOv5-Style
			
 
				-    yolov5_p_trans_config,
			
 
				-    yolov5_n_trans_config,
			
 
				-    yolov5_s_trans_config,
			
 
				-    yolov5_m_trans_config,
			
 
				-    yolov5_l_trans_config,
			
 
				-    yolov5_x_trans_config,
			
 
				+    yolo_p_trans_config,
			
 
				+    yolo_n_trans_config,
			
 
				+    yolo_s_trans_config,
			
 
				+    yolo_m_trans_config,
			
 
				+    yolo_l_trans_config,
			
 
				+    yolo_x_trans_config,
			
 
				     # YOLOX-Style
			
 
				     yolox_p_trans_config,
			
 
				     yolox_n_trans_config,
			
@@ -30,10 +32,6 @@ from .data_config.transform_config import (
 
				     yolox_m_trans_config,
			
 
				     yolox_l_trans_config,
			
 
				     yolox_x_trans_config,
			
 
				-    # SSD-Style
			
 
				-    ssd_trans_config,
			
 
				-    # RT-DETR style
			
 
				-    rtdetr_base_trans_config,
			
 
				 )
			
 
				 
			
 
				 def build_trans_config(trans_config='ssd'):
			
@@ -45,18 +43,18 @@ def build_trans_config(trans_config='ssd'):
 
				         cfg = ssd_trans_config
			
 
				 
			
 
				     # YOLOv5-style transform 
			
 
				-    elif trans_config == 'yolov5_p':
			
 
				-        cfg = yolov5_p_trans_config
			
 
				-    elif trans_config == 'yolov5_n':
			
 
				-        cfg = yolov5_n_trans_config
			
 
				-    elif trans_config == 'yolov5_s':
			
 
				-        cfg = yolov5_s_trans_config
			
 
				-    elif trans_config == 'yolov5_m':
			
 
				-        cfg = yolov5_m_trans_config
			
 
				-    elif trans_config == 'yolov5_l':
			
 
				-        cfg = yolov5_l_trans_config
			
 
				-    elif trans_config == 'yolov5_x':
			
 
				-        cfg = yolov5_x_trans_config
			
 
				+    elif trans_config == 'yolo_p':
			
 
				+        cfg = yolo_p_trans_config
			
 
				+    elif trans_config == 'yolo_n':
			
 
				+        cfg = yolo_n_trans_config
			
 
				+    elif trans_config == 'yolo_s':
			
 
				+        cfg = yolo_s_trans_config
			
 
				+    elif trans_config == 'yolo_m':
			
 
				+        cfg = yolo_m_trans_config
			
 
				+    elif trans_config == 'yolo_l':
			
 
				+        cfg = yolo_l_trans_config
			
 
				+    elif trans_config == 'yolo_x':
			
 
				+        cfg = yolo_x_trans_config
			
 
				         
			
 
				     # YOLOX-style transform 
			
 
				     elif trans_config == 'yolox_p':
			
@@ -72,10 +70,8 @@ def build_trans_config(trans_config='ssd'):
 
				     elif trans_config == 'yolox_x':
			
 
				         cfg = yolox_x_trans_config
			
 
				 
			
 
				-    # RT-DETR style
			
 
				-    elif trans_config == 'rtdetr_base':
			
 
				-        cfg = rtdetr_base_trans_config
			
 
				-
			
 
				+    else:
			
 
				+        raise NotImplementedError("Unknown transform config: {}".format(trans_config))
			
 
				     print('Transform Config: {} \n'.format(cfg))
			
 
				 
			
 
				     return cfg
			
@@ -87,7 +83,7 @@ from .model_config.yolov1_config import yolov1_cfg
 
				 from .model_config.yolov2_config import yolov2_cfg
			
 
				 from .model_config.yolov3_config import yolov3_cfg
			
 
				 from .model_config.yolov4_config import yolov4_cfg
			
 
				-from .model_config.yolov5_config import yolov5_cfg, yolov5_adamw_cfg
			
 
				+from .model_config.yolov5_config import yolov5_cfg
			
 
				 from .model_config.yolov7_config import yolov7_cfg
			
 
				 from .model_config.yolov8_config import yolov8_cfg
			
 
				 from .model_config.yolox_config  import yolox_cfg
			
@@ -110,9 +106,6 @@ def build_model_config(args):
 
				     # YOLOv5
			
 
				     elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
			
 
				         cfg = yolov5_cfg[args.model]
			
 
				-    # YOLOv5-AdamW
			
 
				-    elif args.model in ['yolov5_n_adamw', 'yolov5_s_adamw', 'yolov5_m_adamw', 'yolov5_l_adamw', 'yolov5_x_adamw']:
			
 
				-        cfg = yolov5_adamw_cfg[args.model]
			
 
				     # YOLOv7
			
 
				     elif args.model in ['yolov7_tiny', 'yolov7', 'yolov7_x']:
			
 
				         cfg = yolov7_cfg[args.model]
			
--- a/config/data_config/transform_config.py
+++ b/config/data_config/transform_config.py
@@ -1,9 +1,22 @@
 
				 # Transform config
			
 
				 
			
 
				 
			
 
				+# ----------------------- SSD-Style Transform -----------------------
			
 
				+ssd_trans_config = {
			
 
				+    'aug_type': 'ssd',
			
 
				+    'use_ablu': False,
			
 
				+    # Mosaic & Mixup are not used for SSD-style augmentation
			
 
				+    'mosaic_prob': 0.0,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				+}
			
 
				+
			
 
				+
			
 
				 # ----------------------- YOLOv5-Style Transform -----------------------
			
 
				-yolov5_x_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_x_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -17,7 +30,6 @@ yolov5_x_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.2,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -25,8 +37,8 @@ yolov5_x_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				-yolov5_l_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_l_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -40,7 +52,6 @@ yolov5_l_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.15,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -48,8 +59,8 @@ yolov5_l_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				-yolov5_m_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_m_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -63,7 +74,6 @@ yolov5_m_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.10,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -71,8 +81,8 @@ yolov5_m_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				-yolov5_s_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_s_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -86,7 +96,6 @@ yolov5_s_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -94,8 +103,8 @@ yolov5_s_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				-yolov5_n_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_n_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -109,7 +118,6 @@ yolov5_n_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -117,8 +125,8 @@ yolov5_n_trans_config = {
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				-yolov5_p_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+yolo_p_trans_config = {
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': True,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -132,7 +140,6 @@ yolov5_p_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 0.5,
			
 
				     'mixup_prob':  0.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -143,7 +150,7 @@ yolov5_p_trans_config = {
 
				 
			
 
				 # ----------------------- YOLOX-Style Transform -----------------------
			
 
				 yolox_x_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -157,7 +164,6 @@ yolox_x_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  1.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -166,7 +172,7 @@ yolox_x_trans_config = {
 
				 }
			
 
				 
			
 
				 yolox_l_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -180,7 +186,6 @@ yolox_l_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  1.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -189,7 +194,7 @@ yolox_l_trans_config = {
 
				 }
			
 
				 
			
 
				 yolox_m_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -203,7 +208,6 @@ yolox_m_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  1.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -212,7 +216,7 @@ yolox_m_trans_config = {
 
				 }
			
 
				 
			
 
				 yolox_s_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -226,7 +230,6 @@ yolox_s_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  1.0,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -235,7 +238,7 @@ yolox_s_trans_config = {
 
				 }
			
 
				 
			
 
				 yolox_n_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -249,7 +252,6 @@ yolox_n_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob':  0.5,
			
 
				     'mosaic_type': 'yolov5',
			
@@ -258,7 +260,7 @@ yolox_n_trans_config = {
 
				 }
			
 
				 
			
 
				 yolox_p_trans_config = {
			
 
				-    'aug_type': 'yolov5',
			
 
				+    'aug_type': 'yolo',
			
 
				     'use_ablu': False,
			
 
				     # Basic Augment
			
 
				     'affine_params': {
			
@@ -272,51 +274,9 @@ yolox_p_trans_config = {
 
				         'hsv_v': 0.4,
			
 
				     },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': True,
			
 
				     'mosaic_prob': 0.5,
			
 
				     'mixup_prob':  0.0,
			
 
				     'mosaic_type': 'yolov5',
			
 
				     'mixup_type':  'yolox',
			
 
				     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				-
			
 
				-
			
 
				-# ----------------------- SSD-Style Transform -----------------------
			
 
				-ssd_trans_config = {
			
 
				-    'aug_type': 'ssd',
			
 
				-    'use_ablu': False,
			
 
				-    # Mosaic & Mixup are not used for SSD-style augmentation
			
 
				-    'mosaic_keep_ratio': False,
			
 
				-    'mosaic_prob': 0.0,
			
 
				-    'mixup_prob':  0.0,
			
 
				-    'mosaic_type': 'yolov5',
			
 
				-    'mixup_type':  'yolov5',
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				-}
			
 
				-
			
 
				-
			
 
				-# ----------------------- SSD-Style Transform -----------------------
			
 
				-rtdetr_base_trans_config = {
			
 
				-    'aug_type': 'rtdetr',
			
 
				-    'use_ablu': True,
			
 
				-    'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics
			
 
				-    'pixel_std':  [58.395, 57.12, 57.375],    # IN-1K statistics
			
 
				-    # Basic Augment
			
 
				-    'affine_params': {
			
 
				-        'degrees': 0.0,
			
 
				-        'translate': 0.2,
			
 
				-        'scale': [0.1, 2.0],
			
 
				-        'shear': 0.0,
			
 
				-        'perspective': 0.0,
			
 
				-        'hsv_h': 0.015,
			
 
				-        'hsv_s': 0.7,
			
 
				-        'hsv_v': 0.4,
			
 
				-    },
			
 
				-    # Mosaic & Mixup
			
 
				-    'mosaic_keep_ratio': False,
			
 
				-    'mosaic_prob': 0.0,
			
 
				-    'mixup_prob':  0.0,
			
 
				-    'mosaic_type': 'yolov5',
			
 
				-    'mixup_type':  'yolov5',
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				-}
			
--- a/config/model_config/yolov1_config.py
+++ b/config/model_config/yolov1_config.py
@@ -1,32 +1,34 @@
 
				 # YOLOv1 Config
			
 
				 
			
 
				 yolov1_cfg = {
			
 
				-    # input
			
 
				-    'trans_type': 'ssd',
			
 
				-    'multi_scale': [0.5, 1.5],
			
 
				-    # model
			
 
				+    # ---------------- Model config ----------------
			
 
				+    ## Backbone
			
 
				     'backbone': 'resnet18',
			
 
				     'pretrained': True,
			
 
				     'stride': 32,  # P5
			
 
				     'max_stride': 32,
			
 
				-    # neck
			
 
				+    ## Neck
			
 
				     'neck': 'sppf',
			
 
				-    'expand_ratio': 0.5,
			
 
				-    'pooling_size': 5,
			
 
				     'neck_act': 'lrelu',
			
 
				     'neck_norm': 'BN',
			
 
				     'neck_depthwise': False,
			
 
				-    # head
			
 
				+    'expand_ratio': 0.5,
			
 
				+    'pooling_size': 5,
			
 
				+    ## Head
			
 
				     'head': 'decoupled_head',
			
 
				     'head_act': 'lrelu',
			
 
				     'head_norm': 'BN',
			
 
				     'num_cls_head': 2,
			
 
				     'num_reg_head': 2,
			
 
				     'head_depthwise': False,
			
 
				-    # loss weight
			
 
				+    # ---------------- Data process config ----------------
			
 
				+    ## Input
			
 
				+    'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				+    'trans_type': 'ssd',
			
 
				+    # ---------------- Loss config ----------------
			
 
				     'loss_obj_weight': 1.0,
			
 
				     'loss_cls_weight': 1.0,
			
 
				     'loss_box_weight': 5.0,
			
 
				-    # training configuration
			
 
				-    'trainer_type': 'yolov8',
			
 
				+    # ---------------- Trainer config ----------------
			
 
				+    'trainer_type': 'yolo',
			
 
				 }
			
--- a/config/model_config/yolov2_config.py
+++ b/config/model_config/yolov2_config.py
@@ -1,22 +1,20 @@
 
				 # YOLOv2 Config
			
 
				 
			
 
				 yolov2_cfg = {
			
 
				-    # input
			
 
				-    'trans_type': 'ssd',
			
 
				-    'multi_scale': [0.5, 1.5],
			
 
				-    # model
			
 
				+    # ---------------- Model config ----------------
			
 
				+    ## Backbone
			
 
				     'backbone': 'darknet19',
			
 
				     'pretrained': True,
			
 
				     'stride': 32,  # P5
			
 
				     'max_stride': 32,
			
 
				-    # neck
			
 
				+    ## Neck
			
 
				     'neck': 'sppf',
			
 
				-    'expand_ratio': 0.5,
			
 
				-    'pooling_size': 5,
			
 
				     'neck_act': 'lrelu',
			
 
				     'neck_norm': 'BN',
			
 
				     'neck_depthwise': False,
			
 
				-    # head
			
 
				+    'expand_ratio': 0.5,
			
 
				+    'pooling_size': 5,
			
 
				+    ## Head
			
 
				     'head': 'decoupled_head',
			
 
				     'head_act': 'lrelu',
			
 
				     'head_norm': 'BN',
			
@@ -27,13 +25,17 @@ yolov2_cfg = {
 
				                     [55,  75],
			
 
				                     [92,  206],
			
 
				                     [202, 21],
			
 
				-                    [289, 311]],  # 416
			
 
				-    # matcher
			
 
				+                    [289, 311]],  # 416 scale
			
 
				+    # ---------------- Data process config ----------------
			
 
				+    ## Input
			
 
				+    'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				+    'trans_type': 'ssd',
			
 
				+    # ---------------- Matcher config ----------------
			
 
				     'iou_thresh': 0.5,
			
 
				-    # loss weight
			
 
				+    # ---------------- Loss config ----------------
			
 
				     'loss_obj_weight': 1.0,
			
 
				     'loss_cls_weight': 1.0,
			
 
				     'loss_box_weight': 5.0,
			
 
				-    # training configuration
			
 
				-    'trainer_type': 'yolov8',
			
 
				+    # ---------------- Trainer config ----------------
			
 
				+    'trainer_type': 'yolo',
			
 
				 }
			
--- a/config/model_config/yolov3_config.py
+++ b/config/model_config/yolov3_config.py
@@ -12,11 +12,11 @@ yolov3_cfg = {
 
				         'max_stride': 32,
			
 
				         ## Neck
			
 
				         'neck': 'sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         ## FPN
			
 
				         'fpn': 'yolov3_fpn',
			
 
				         'fpn_act': 'silu',
			
@@ -32,9 +32,8 @@ yolov3_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'trans_type': 'yolov5_l',
			
 
				+        # ---------------- Data process config ----------------
			
 
				+        'trans_type': 'yolo_l',
			
 
				         'multi_scale': [0.5, 1.25],  # 320 -> 800
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
@@ -45,7 +44,7 @@ yolov3_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov3_tiny':{
			
@@ -59,11 +58,11 @@ yolov3_cfg = {
 
				         'max_stride': 32,
			
 
				         ## Neck
			
 
				         'neck': 'sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         ## FPN
			
 
				         'fpn': 'yolov3_fpn',
			
 
				         'fpn_act': 'silu',
			
@@ -79,9 +78,9 @@ yolov3_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				-        'trans_type': 'yolov5_n',
			
 
				+        'trans_type': 'yolo_n',
			
 
				         'multi_scale': [0.5, 1.25],  # 320 -> 800
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
@@ -92,7 +91,7 @@ yolov3_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				 }
			
--- a/config/model_config/yolov4_config.py
+++ b/config/model_config/yolov4_config.py
@@ -32,9 +32,8 @@ yolov4_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'trans_type': 'yolov5_l',
			
 
				+        # ---------------- Data process config ----------------
			
 
				+        'trans_type': 'yolo_l',
			
 
				         'multi_scale': [0.5, 1.25],  # 320 -> 800
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
@@ -45,7 +44,7 @@ yolov4_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov4_tiny':{
			
@@ -59,11 +58,11 @@ yolov4_cfg = {
 
				         'max_stride': 32,
			
 
				         ## Neck
			
 
				         'neck': 'csp_sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         ## FPN
			
 
				         'fpn': 'yolov4_pafpn',
			
 
				         'fpn_act': 'silu',
			
@@ -79,9 +78,8 @@ yolov4_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'trans_type': 'yolov5_n',
			
 
				+        # ---------------- Data process config ----------------
			
 
				+        'trans_type': 'yolo_n',
			
 
				         'multi_scale': [0.5, 1.25],  # 320 -> 800
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
@@ -92,7 +90,7 @@ yolov4_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				 }
			
--- a/config/model_config/yolov5_config.py
+++ b/config/model_config/yolov5_config.py
@@ -31,10 +31,10 @@ yolov5_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_n',
			
 
				+        'trans_type': 'yolo_n',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -44,7 +44,7 @@ yolov5_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov5_s':{
			
@@ -76,10 +76,10 @@ yolov5_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_s',
			
 
				+        'trans_type': 'yolo_s',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -89,7 +89,7 @@ yolov5_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov5_m':{
			
@@ -121,10 +121,10 @@ yolov5_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_m',
			
 
				+        'trans_type': 'yolo_m',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -134,7 +134,7 @@ yolov5_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov5_l':{
			
@@ -166,10 +166,10 @@ yolov5_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_l',
			
 
				+        'trans_type': 'yolo_l',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -179,7 +179,7 @@ yolov5_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov5_x':{
			
@@ -211,10 +211,10 @@ yolov5_cfg = {
 
				         'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				                         [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				                         [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				+        # ---------------- Data process config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_x',
			
 
				+        'trans_type': 'yolo_x',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## matcher
			
 
				         'anchor_thresh': 4.0,
			
@@ -224,235 +224,6 @@ yolov5_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'yolov8',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 }
			
 
				-
			
 
				-
			
 
				-yolov5_adamw_cfg = {
			
 
				-    'yolov5_n_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.25,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_n',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				-    'yolov5_s_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.50,
			
 
				-        'depth': 0.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_s',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				-    'yolov5_m_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 0.75,
			
 
				-        'depth': 0.67,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_m',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				-    'yolov5_l_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 1.0,
			
 
				-        'depth': 1.0,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_l',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				-    'yolov5_x_adamw':{
			
 
				-        # ---------------- Model config ----------------
			
 
				-        ## Backbone
			
 
				-        'backbone': 'cspdarknet',
			
 
				-        'bk_act': 'silu',
			
 
				-        'bk_norm': 'BN',
			
 
				-        'bk_dpw': False,
			
 
				-        'width': 1.25,
			
 
				-        'depth': 1.34,
			
 
				-        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				-        'max_stride': 32,
			
 
				-        ## FPN
			
 
				-        'fpn': 'yolov5_pafpn',
			
 
				-        'fpn_reduce_layer': 'Conv',
			
 
				-        'fpn_downsample_layer': 'Conv',
			
 
				-        'fpn_core_block': 'CSPBlock',
			
 
				-        'fpn_act': 'silu',
			
 
				-        'fpn_norm': 'BN',
			
 
				-        'fpn_depthwise': False,
			
 
				-        ## Head
			
 
				-        'head': 'decoupled_head',
			
 
				-        'head_act': 'silu',
			
 
				-        'head_norm': 'BN',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				-        'head_depthwise': False,
			
 
				-        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
			
 
				-                        [30, 61],   [62, 45],   [59, 119],    # P4
			
 
				-                        [116, 90],  [156, 198], [373, 326]],  # P5
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				-        'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'yolov5_x',
			
 
				-        # ---------------- Assignment config ----------------
			
 
				-        ## matcher
			
 
				-        'anchor_thresh': 4.0,
			
 
				-        # ---------------- Loss config ----------------
			
 
				-        ## loss weight
			
 
				-        'loss_obj_weight': 1.0,
			
 
				-        'loss_cls_weight': 1.0,
			
 
				-        'loss_box_weight': 5.0,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				-    },
			
 
				-
			
 
				-}
			
--- a/config/model_config/yolov7_config.py
+++ b/config/model_config/yolov7_config.py
@@ -5,7 +5,7 @@ yolov7_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
 
				         'backbone': 'elannet_tiny',
			
 
				-        'pretrained': True,
			
 
				+        'pretrained': False,
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
@@ -13,11 +13,11 @@ yolov7_cfg = {
 
				         'max_stride': 32,
			
 
				         ## Neck
			
 
				         'neck': 'csp_sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         ## FPN
			
 
				         'fpn': 'yolov7_pafpn',
			
 
				         'fpn_act': 'silu',
			
@@ -33,8 +33,7 @@ yolov7_cfg = {
 
				         'num_cls_head': 2,
			
 
				         'num_reg_head': 2,
			
 
				         'head_depthwise': False,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				+        # ---------------- Data process config ----------------
			
 
				         'trans_type': 'yolox_s',
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				         # ---------------- Assignment config ----------------
			
@@ -47,7 +46,7 @@ yolov7_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov7':{
			
@@ -62,11 +61,11 @@ yolov7_cfg = {
 
				         'max_stride': 32,
			
 
				         # neck
			
 
				         'neck': 'csp_sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         # fpn
			
 
				         'fpn': 'yolov7_pafpn',
			
 
				         'fpn_act': 'silu',
			
@@ -82,8 +81,7 @@ yolov7_cfg = {
 
				         'num_cls_head': 2,
			
 
				         'num_reg_head': 2,
			
 
				         'head_depthwise': False,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				+        # ---------------- Data process config ----------------
			
 
				         'trans_type': 'yolox_l',
			
 
				         'multi_scale': [0.5, 1.25], # 320 -> 800
			
 
				         # ---------------- Assignment config ----------------
			
@@ -96,7 +94,7 @@ yolov7_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov7_x':{
			
@@ -111,11 +109,11 @@ yolov7_cfg = {
 
				         'max_stride': 32,
			
 
				         ## Neck
			
 
				         'neck': 'csp_sppf',
			
 
				-        'expand_ratio': 0.5,
			
 
				-        'pooling_size': 5,
			
 
				         'neck_act': 'silu',
			
 
				         'neck_norm': 'BN',
			
 
				         'neck_depthwise': False,
			
 
				+        'expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				         ## FPN
			
 
				         'fpn': 'yolov7_pafpn',
			
 
				         'fpn_act': 'silu',
			
@@ -131,8 +129,7 @@ yolov7_cfg = {
 
				         'num_cls_head': 2,
			
 
				         'num_reg_head': 2,
			
 
				         'head_depthwise': False,
			
 
				-        # ---------------- Train config ----------------
			
 
				-        ## input
			
 
				+        # ---------------- Data process config ----------------
			
 
				         'trans_type': 'yolox_x',
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				         # ---------------- Assignment config ----------------
			
@@ -145,7 +142,7 @@ yolov7_cfg = {
 
				         'loss_cls_weight': 1.0,
			
 
				         'loss_box_weight': 5.0,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				 }
			
--- a/config/model_config/yolov8_config.py
+++ b/config/model_config/yolov8_config.py
@@ -37,7 +37,7 @@ yolov8_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## Input
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				-        'trans_type': 'yolov5_n',
			
 
				+        'trans_type': 'yolo_n',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## Matcher
			
 
				         'matcher': "tal",
			
@@ -49,7 +49,7 @@ yolov8_cfg = {
 
				         'loss_box_weight': 7.5,
			
 
				         'loss_dfl_weight': 1.5,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov8_s':{
			
@@ -87,7 +87,7 @@ yolov8_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## Input
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				-        'trans_type': 'yolov5_s',
			
 
				+        'trans_type': 'yolo_s',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## Matcher
			
 
				         'matcher': "tal",
			
@@ -99,7 +99,7 @@ yolov8_cfg = {
 
				         'loss_box_weight': 7.5,
			
 
				         'loss_dfl_weight': 1.5,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov8_m':{
			
@@ -137,7 +137,7 @@ yolov8_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## Input
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				-        'trans_type': 'yolov5_m',
			
 
				+        'trans_type': 'yolo_m',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## Matcher
			
 
				         'matcher': "tal",
			
@@ -149,7 +149,7 @@ yolov8_cfg = {
 
				         'loss_box_weight': 7.5,
			
 
				         'loss_dfl_weight': 1.5,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov8_l':{
			
@@ -187,7 +187,7 @@ yolov8_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## Input
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				-        'trans_type': 'yolov5_l',
			
 
				+        'trans_type': 'yolo_l',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## Matcher
			
 
				         'matcher': "tal",
			
@@ -199,7 +199,7 @@ yolov8_cfg = {
 
				         'loss_box_weight': 7.5,
			
 
				         'loss_dfl_weight': 1.5,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				     'yolov8_x':{
			
@@ -237,7 +237,7 @@ yolov8_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## Input
			
 
				         'multi_scale': [0.5, 1.5], # 320 -> 960
			
 
				-        'trans_type': 'yolov5_x',
			
 
				+        'trans_type': 'yolo_x',
			
 
				         # ---------------- Assignment config ----------------
			
 
				         ## Matcher
			
 
				         'matcher': "tal",
			
@@ -249,7 +249,7 @@ yolov8_cfg = {
 
				         'loss_box_weight': 7.5,
			
 
				         'loss_dfl_weight': 1.5,
			
 
				         # ---------------- Train config ----------------
			
 
				-        'trainer_type': 'rtcdet',
			
 
				+        'trainer_type': 'yolo',
			
 
				     },
			
 
				 
			
 
				 }
			
--- a/dataset/build.py
+++ b/dataset/build.py
@@ -2,28 +2,25 @@ import os
 
				 
			
 
				 try:
			
 
				     # dataset class
			
 
				-    from .voc import VOCDataset
			
 
				-    from .coco import COCODataset
			
 
				+    from .voc        import VOCDataset
			
 
				+    from .coco       import COCODataset
			
 
				     from .crowdhuman import CrowdHumanDataset
			
 
				-    from .widerface import WiderFaceDataset
			
 
				-    from .customed import CustomedDataset
			
 
				+    from .widerface  import WiderFaceDataset
			
 
				+    from .customed   import CustomedDataset
			
 
				     # transform class
			
 
				-    from .data_augment.ssd_augment import SSDAugmentation, SSDBaseTransform
			
 
				+    from .data_augment.ssd_augment    import SSDAugmentation, SSDBaseTransform
			
 
				     from .data_augment.yolov5_augment import YOLOv5Augmentation, YOLOv5BaseTransform
			
 
				-    from .data_augment.rtdetr_augment import RTDetrAugmentation, RTDetrBaseTransform
			
 
				 
			
 
				 except:
			
 
				     # dataset class
			
 
				-    from voc import VOCDataset
			
 
				-    from coco import COCODataset
			
 
				+    from voc        import VOCDataset
			
 
				+    from coco       import COCODataset
			
 
				     from crowdhuman import CrowdHumanDataset
			
 
				-    from widerface import WiderFaceDataset
			
 
				-    from customed import CustomedDataset
			
 
				+    from widerface  import WiderFaceDataset
			
 
				+    from customed   import CustomedDataset
			
 
				     # transform class
			
 
				-    from data_augment.ssd_augment import SSDAugmentation, SSDBaseTransform
			
 
				+    from data_augment.ssd_augment    import SSDAugmentation, SSDBaseTransform
			
 
				     from data_augment.yolov5_augment import YOLOv5Augmentation, YOLOv5BaseTransform
			
 
				-    from data_augment.rtdetr_augment import RTDetrAugmentation, RTDetrBaseTransform
			
 
				-
			
 
				 
			
 
				 # ------------------------------ Dataset ------------------------------
			
 
				 def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
			
@@ -48,7 +45,6 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
 
				                              transform    = transform,
			
 
				                              trans_config = trans_config,
			
 
				                              is_train     = is_train,
			
 
				-                             load_cache   = args.load_cache
			
 
				                              )
			
 
				     ## COCO dataset
			
 
				     elif args.dataset == 'coco':
			
@@ -59,7 +55,6 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
 
				                               transform    = transform,
			
 
				                               trans_config = trans_config,
			
 
				                               is_train     = is_train,
			
 
				-                              load_cache   = args.load_cache
			
 
				                               )
			
 
				     ## CrowdHuman dataset
			
 
				     elif args.dataset == 'crowdhuman':
			
@@ -89,8 +84,7 @@ def build_dataset(args, data_cfg, trans_config, transform, is_train=False):
 
				                                   image_set    = image_set,
			
 
				                                   transform    = transform,
			
 
				                                   trans_config = trans_config,
			
 
				-                                  is_train      = is_train,
			
 
				-                                  load_cache    = args.load_cache
			
 
				+                                  is_train     = is_train,
			
 
				                                   )
			
 
				 
			
 
				     return dataset, dataset_info
			
@@ -115,20 +109,10 @@ def build_transform(args, trans_config, max_stride=32, is_train=False):
 
				         else:
			
 
				             transform = SSDBaseTransform(args.img_size)
			
 
				     ## YOLO style transform
			
 
				-    elif trans_config['aug_type'] == 'yolov5':
			
 
				+    elif trans_config['aug_type'] == 'yolo':
			
 
				         if is_train:
			
 
				             transform = YOLOv5Augmentation(args.img_size, trans_config['affine_params'], trans_config['use_ablu'])
			
 
				         else:
			
 
				             transform = YOLOv5BaseTransform(args.img_size, max_stride)
			
 
				-    ## RT-DETR style transform
			
 
				-    elif trans_config['aug_type'] == 'rtdetr':
			
 
				-        if is_train:
			
 
				-            transform = RTDetrAugmentation(
			
 
				-                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
			
 
				-            if trans_config["mosaic_prob"] > 0:
			
 
				-                transform.reset_weak_augment()
			
 
				-        else:
			
 
				-            transform = RTDetrBaseTransform(
			
 
				-                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
			
 
				 
			
 
				     return transform, trans_config
			
--- a/dataset/coco.py
+++ b/dataset/coco.py
@@ -3,12 +3,9 @@ import cv2
 
				 import time
			
 
				 import random
			
 
				 import numpy as np
			
 
				-from torch.utils.data import Dataset
			
 
				 
			
 
				-try:
			
 
				-    from pycocotools.coco import COCO
			
 
				-except:
			
 
				-    print("It seems that the COCOAPI is not installed.")
			
 
				+from torch.utils.data import Dataset
			
 
				+from pycocotools.coco import COCO
			
 
				 
			
 
				 try:
			
 
				     from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
@@ -28,7 +25,6 @@ class COCODataset(Dataset):
 
				                  trans_config = None,
			
 
				                  transform    = None,
			
 
				                  is_train     :bool =False,
			
 
				-                 load_cache   :bool = False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				         self.img_size = img_size
			
@@ -56,8 +52,8 @@ class COCODataset(Dataset):
 
				         if is_train:
			
 
				             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				-            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
@@ -66,12 +62,6 @@ class COCODataset(Dataset):
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				-        print('==============================')
			
 
				-        # ----------- Cached data -----------
			
 
				-        self.load_cache = load_cache
			
 
				-        self.cached_datas = None
			
 
				-        if self.load_cache:
			
 
				-            self.cached_datas = self._load_cache()
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __len__(self):
			
@@ -80,38 +70,6 @@ class COCODataset(Dataset):
 
				     def __getitem__(self, index):
			
 
				         return self.pull_item(index)
			
 
				 
			
 
				-    def _load_cache(self):
			
 
				-        data_items = []
			
 
				-        for idx in range(self.dataset_size):
			
 
				-            if idx % 2000 == 0:
			
 
				-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
			
 
				-
			
 
				-            # load a data
			
 
				-            image, target = self.load_image_target(idx)
			
 
				-            orig_h, orig_w, _ = image.shape
			
 
				-
			
 
				-            # resize image
			
 
				-            r = self.img_size / max(orig_h, orig_w)
			
 
				-            if r != 1: 
			
 
				-                interp = cv2.INTER_LINEAR
			
 
				-                new_size = (int(orig_w * r), int(orig_h * r))
			
 
				-                image = cv2.resize(image, new_size, interpolation=interp)
			
 
				-            img_h, img_w = image.shape[:2]
			
 
				-
			
 
				-            # rescale bbox
			
 
				-            boxes = target["boxes"].copy()
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
			
 
				-            target["boxes"] = boxes
			
 
				-
			
 
				-            dict_item = {}
			
 
				-            dict_item["image"] = image
			
 
				-            dict_item["target"] = target
			
 
				-
			
 
				-            data_items.append(dict_item)
			
 
				-        
			
 
				-        return data_items
			
 
				-
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				         # ------------ Prepare 4 indexes of images ------------
			
@@ -150,25 +108,17 @@ class COCODataset(Dataset):
 
				     
			
 
				     # ------------ Load data function ------------
			
 
				     def load_image_target(self, index):
			
 
				-        # == Load a data from the cached data ==
			
 
				-        if self.cached_datas is not None:
			
 
				-            # load a data
			
 
				-            data_item = self.cached_datas[index]
			
 
				-            image = data_item["image"]
			
 
				-            target = data_item["target"]
			
 
				-        # == Load a data from the local disk ==
			
 
				-        else:        
			
 
				-            # load an image
			
 
				-            image, _ = self.pull_image(index)
			
 
				-            height, width, channels = image.shape
			
 
				-
			
 
				-            # load a target
			
 
				-            bboxes, labels = self.pull_anno(index)
			
 
				-            target = {
			
 
				-                "boxes": bboxes,
			
 
				-                "labels": labels,
			
 
				-                "orig_size": [height, width]
			
 
				-            }
			
 
				+        # load an image
			
 
				+        image, _ = self.pull_image(index)
			
 
				+        height, width, channels = image.shape
			
 
				+
			
 
				+        # load a target
			
 
				+        bboxes, labels = self.pull_anno(index)
			
 
				+        target = {
			
 
				+            "boxes": bboxes,
			
 
				+            "labels": labels,
			
 
				+            "orig_size": [height, width]
			
 
				+        }
			
 
				 
			
 
				         return image, target
			
 
				 
			
@@ -256,7 +206,7 @@ if __name__ == "__main__":
 
				     parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				                         help='input image size.')
			
 
				     parser.add_argument('--aug_type', type=str, default='ssd',
			
 
				-                        help='augmentation type: ssd, yolov5, rtdetr.')
			
 
				+                        help='augmentation type: ssd, yolo.')
			
 
				     parser.add_argument('--mosaic', default=0., type=float,
			
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=0., type=float,
			
@@ -265,8 +215,6 @@ if __name__ == "__main__":
 
				                         help='mixup augmentation.')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
 
				-    parser.add_argument('--load_cache', action="store_true", default=False,
			
 
				-                        help='load cached data.')
			
 
				     
			
 
				     args = parser.parse_args()
			
 
				 
			
@@ -306,7 +254,6 @@ if __name__ == "__main__":
 
				         trans_config=trans_config,
			
 
				         transform=transform,
			
 
				         is_train=args.is_train,
			
 
				-        load_cache=args.load_cache
			
 
				         )
			
 
				     
			
 
				     np.random.seed(0)
			
--- a/dataset/crowdhuman.py
+++ b/dataset/crowdhuman.py
@@ -47,8 +47,8 @@ class CrowdHumanDataset(Dataset):
 
				         if is_train:
			
 
				             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				-            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
@@ -57,7 +57,6 @@ class CrowdHumanDataset(Dataset):
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				-        print('==============================')
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __len__(self):
			
@@ -200,8 +199,6 @@ if __name__ == "__main__":
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=0., type=float,
			
 
				                         help='mixup augmentation.')
			
 
				-    parser.add_argument('--mixup_type', type=str, default='yolov5_mixup',
			
 
				-                        help='mixup augmentation.')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
 
				 
			
@@ -224,8 +221,8 @@ if __name__ == "__main__":
 
				         # Mosaic & Mixup
			
 
				         'mosaic_prob': args.mosaic,
			
 
				         'mixup_prob': args.mixup,
			
 
				-        'mosaic_type': 'yolov5_mosaic',
			
 
				-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
			
 
				+        'mosaic_type': 'yolov5',
			
 
				+        'mixup_type':  'yolov5',   # optional: yolov5, yolox
			
 
				         'mosaic_keep_ratio': False,
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
--- a/dataset/customed.py
+++ b/dataset/customed.py
@@ -25,7 +25,6 @@ class CustomedDataset(Dataset):
 
				                  transform          = None,
			
 
				                  trans_config       = None,
			
 
				                  is_train     :bool =False,
			
 
				-                 load_cache   :bool = False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				         self.img_size = img_size
			
@@ -46,8 +45,8 @@ class CustomedDataset(Dataset):
 
				         if is_train:
			
 
				             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				-            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
@@ -58,13 +57,6 @@ class CustomedDataset(Dataset):
 
				         print('Json file: {}'.format(self.json_file))
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				-        print('==============================')
			
 
				-        # ----------- Cached data -----------
			
 
				-        self.load_cache = load_cache
			
 
				-        self.cached_datas = None
			
 
				-        if self.load_cache:
			
 
				-            self.cached_datas = self._load_cache()
			
 
				-
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __len__(self):
			
@@ -73,38 +65,6 @@ class CustomedDataset(Dataset):
 
				     def __getitem__(self, index):
			
 
				         return self.pull_item(index)
			
 
				 
			
 
				-    def _load_cache(self):
			
 
				-        data_items = []
			
 
				-        for idx in range(self.dataset_size):
			
 
				-            if idx % 2000 == 0:
			
 
				-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
			
 
				-
			
 
				-            # load a data
			
 
				-            image, target = self.load_image_target(idx)
			
 
				-            orig_h, orig_w, _ = image.shape
			
 
				-
			
 
				-            # resize image
			
 
				-            r = self.img_size / max(orig_h, orig_w)
			
 
				-            if r != 1: 
			
 
				-                interp = cv2.INTER_LINEAR
			
 
				-                new_size = (int(orig_w * r), int(orig_h * r))
			
 
				-                image = cv2.resize(image, new_size, interpolation=interp)
			
 
				-            img_h, img_w = image.shape[:2]
			
 
				-
			
 
				-            # rescale bbox
			
 
				-            boxes = target["boxes"].copy()
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
			
 
				-            target["boxes"] = boxes
			
 
				-
			
 
				-            dict_item = {}
			
 
				-            dict_item["image"] = image
			
 
				-            dict_item["target"] = target
			
 
				-
			
 
				-            data_items.append(dict_item)
			
 
				-        
			
 
				-        return data_items
			
 
				-
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				         # ------------ Prepare 4 indexes of images ------------
			
@@ -143,25 +103,17 @@ class CustomedDataset(Dataset):
 
				     
			
 
				     # ------------ Load data function ------------
			
 
				     def load_image_target(self, index):
			
 
				-        # == Load a data from the cached data ==
			
 
				-        if self.cached_datas is not None:
			
 
				-            # load a data
			
 
				-            data_item = self.cached_datas[index]
			
 
				-            image = data_item["image"]
			
 
				-            target = data_item["target"]
			
 
				-        # == Load a data from the local disk ==
			
 
				-        else:        
			
 
				-            # load an image
			
 
				-            image, _ = self.pull_image(index)
			
 
				-            height, width, channels = image.shape
			
 
				-
			
 
				-            # load a target
			
 
				-            bboxes, labels = self.pull_anno(index)
			
 
				-            target = {
			
 
				-                "boxes": bboxes,
			
 
				-                "labels": labels,
			
 
				-                "orig_size": [height, width]
			
 
				-            }
			
 
				+        # load an image
			
 
				+        image, _ = self.pull_image(index)
			
 
				+        height, width, channels = image.shape
			
 
				+
			
 
				+        # load a target
			
 
				+        bboxes, labels = self.pull_anno(index)
			
 
				+        target = {
			
 
				+            "boxes": bboxes,
			
 
				+            "labels": labels,
			
 
				+            "orig_size": [height, width]
			
 
				+        }
			
 
				 
			
 
				         return image, target
			
 
				 
			
@@ -257,8 +209,6 @@ if __name__ == "__main__":
 
				                         help='mixup augmentation.')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
 
				-    parser.add_argument('--load_cache', action="store_true", default=False,
			
 
				-                        help='load cached data.')
			
 
				     
			
 
				     args = parser.parse_args()
			
 
				 
			
@@ -279,9 +229,8 @@ if __name__ == "__main__":
 
				         # Mosaic & Mixup
			
 
				         'mosaic_prob': args.mosaic,
			
 
				         'mixup_prob': args.mixup,
			
 
				-        'mosaic_type': 'yolov5_mosaic',
			
 
				-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
			
 
				-        'mosaic_keep_ratio': False,
			
 
				+        'mosaic_type': 'yolov5',
			
 
				+        'mixup_type':  'yolov5',
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
 
				     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
			
--- a/dataset/data_augment/rtdetr_augment.py
+++ b/dataset/data_augment/rtdetr_augment.py
@@ -1,471 +0,0 @@
 
				-# ------------------------------------------------------------
			
 
				-# Data preprocessor for Real-time DETR
			
 
				-# ------------------------------------------------------------
			
 
				-import cv2
			
 
				-import numpy as np
			
 
				-from numpy import random
			
 
				-
			
 
				-import torch
			
 
				-import torch.nn.functional as F
			
 
				-
			
 
				-
			
 
				-# ------------------------- Augmentations -------------------------
			
 
				-class Compose(object):
			
 
				-    """Composes several augmentations together.
			
 
				-    Args:
			
 
				-        transforms (List[Transform]): list of transforms to compose.
			
 
				-    Example:
			
 
				-        >>> augmentations.Compose([
			
 
				-        >>>     transforms.CenterCrop(10),
			
 
				-        >>>     transforms.ToTensor(),
			
 
				-        >>> ])
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, transforms):
			
 
				-        self.transforms = transforms
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        for t in self.transforms:
			
 
				-            image, target = t(image, target)
			
 
				-        return image, target
			
 
				-
			
 
				-## Convert color format
			
 
				-class ConvertColorFormat(object):
			
 
				-    def __init__(self, color_format='rgb'):
			
 
				-        self.color_format = color_format
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        """
			
 
				-        Input:
			
 
				-            image: (np.array) a OpenCV image with BGR color format.
			
 
				-            target: None
			
 
				-        Output:
			
 
				-            image: (np.array) a OpenCV image with given color format.
			
 
				-            target: None
			
 
				-        """
			
 
				-        # Convert color format
			
 
				-        if self.color_format == 'rgb':
			
 
				-            image = image[..., (2, 1, 0)]    # BGR -> RGB
			
 
				-        elif self.color_format == 'bgr':
			
 
				-            image = image
			
 
				-        else:
			
 
				-            raise NotImplementedError("Unknown color format: <{}>".format(self.color_format))
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-## Random Photometric Distort
			
 
				-class RandomPhotometricDistort(object):
			
 
				-    """
			
 
				-    Distort image w.r.t hue, saturation and exposure.
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, hue=0.1, saturation=1.5, exposure=1.5):
			
 
				-        super().__init__()
			
 
				-        self.hue = hue
			
 
				-        self.saturation = saturation
			
 
				-        self.exposure = exposure
			
 
				-
			
 
				-    def __call__(self, image: np.ndarray, target=None) -> np.ndarray:
			
 
				-        """
			
 
				-        Args:
			
 
				-            img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be
			
 
				-                of type uint8 in range [0, 255], or floating point in range
			
 
				-                [0, 1] or [0, 255].
			
 
				-
			
 
				-        Returns:
			
 
				-            ndarray: the distorted image(s).
			
 
				-        """
			
 
				-        if random.random() < 0.5:
			
 
				-            dhue = np.random.uniform(low=-self.hue, high=self.hue)
			
 
				-            dsat = self._rand_scale(self.saturation)
			
 
				-            dexp = self._rand_scale(self.exposure)
			
 
				-
			
 
				-            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
			
 
				-            image = np.asarray(image, dtype=np.float32) / 255.
			
 
				-            image[:, :, 1] *= dsat
			
 
				-            image[:, :, 2] *= dexp
			
 
				-            H = image[:, :, 0] + dhue * 179 / 255.
			
 
				-
			
 
				-            if dhue > 0:
			
 
				-                H[H > 1.0] -= 1.0
			
 
				-            else:
			
 
				-                H[H < 0.0] += 1.0
			
 
				-
			
 
				-            image[:, :, 0] = H
			
 
				-            image = (image * 255).clip(0, 255).astype(np.uint8)
			
 
				-            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
			
 
				-            image = np.asarray(image, dtype=np.uint8)
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-    def _rand_scale(self, upper_bound):
			
 
				-        """
			
 
				-        Calculate random scaling factor.
			
 
				-
			
 
				-        Args:
			
 
				-            upper_bound (float): range of the random scale.
			
 
				-        Returns:
			
 
				-            random scaling factor (float) whose range is
			
 
				-            from 1 / s to s .
			
 
				-        """
			
 
				-        scale = np.random.uniform(low=1, high=upper_bound)
			
 
				-        if np.random.rand() > 0.5:
			
 
				-            return scale
			
 
				-        return 1 / scale
			
 
				-
			
 
				-## Random scaling
			
 
				-class RandomExpand(object):
			
 
				-    def __init__(self, fill_value) -> None:
			
 
				-        self.fill_value = fill_value
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        if random.randint(2):
			
 
				-            return image, target
			
 
				-
			
 
				-        height, width, channels = image.shape
			
 
				-        ratio = random.uniform(1, 4)
			
 
				-        left = random.uniform(0, width*ratio - width)
			
 
				-        top = random.uniform(0, height*ratio - height)
			
 
				-
			
 
				-        expand_image = np.ones(
			
 
				-            (int(height*ratio), int(width*ratio), channels),
			
 
				-            dtype=image.dtype) * self.fill_value
			
 
				-        expand_image[int(top):int(top + height),
			
 
				-                     int(left):int(left + width)] = image
			
 
				-        image = expand_image
			
 
				-
			
 
				-        boxes = target['boxes'].copy()
			
 
				-        boxes[:, :2] += (int(left), int(top))
			
 
				-        boxes[:, 2:] += (int(left), int(top))
			
 
				-        target['boxes'] = boxes
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-## Random IoU based Sample Crop
			
 
				-class RandomSampleCrop(object):
			
 
				-    def __init__(self):
			
 
				-        self.sample_options = (
			
 
				-            # using entire original input image
			
 
				-            None,
			
 
				-            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
			
 
				-            (0.1, None),
			
 
				-            (0.3, None),
			
 
				-            (0.5, None),
			
 
				-            (0.7, None),
			
 
				-            (0.9, None),
			
 
				-            # randomly sample a patch
			
 
				-            (None, None),
			
 
				-        )
			
 
				-
			
 
				-    def intersect(self, box_a, box_b):
			
 
				-        max_xy = np.minimum(box_a[:, 2:], box_b[2:])
			
 
				-        min_xy = np.maximum(box_a[:, :2], box_b[:2])
			
 
				-        inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
			
 
				-
			
 
				-        return inter[:, 0] * inter[:, 1]
			
 
				-
			
 
				-    def compute_iou(self, box_a, box_b):
			
 
				-        inter = self.intersect(box_a, box_b)
			
 
				-        area_a = ((box_a[:, 2]-box_a[:, 0]) *
			
 
				-                (box_a[:, 3]-box_a[:, 1]))  # [A,B]
			
 
				-        area_b = ((box_b[2]-box_b[0]) *
			
 
				-                (box_b[3]-box_b[1]))  # [A,B]
			
 
				-        union = area_a + area_b - inter
			
 
				-        return inter / union  # [A,B]
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        height, width, _ = image.shape
			
 
				-
			
 
				-        # check target
			
 
				-        if len(target["boxes"]) == 0:
			
 
				-            return image, target
			
 
				-
			
 
				-        while True:
			
 
				-            # randomly choose a mode
			
 
				-            sample_id = np.random.randint(len(self.sample_options))
			
 
				-            mode = self.sample_options[sample_id]
			
 
				-            if mode is None:
			
 
				-                return image, target
			
 
				-
			
 
				-            boxes = target["boxes"]
			
 
				-            labels = target["labels"]
			
 
				-
			
 
				-            min_iou, max_iou = mode
			
 
				-            if min_iou is None:
			
 
				-                min_iou = float('-inf')
			
 
				-            if max_iou is None:
			
 
				-                max_iou = float('inf')
			
 
				-
			
 
				-            # max trails (50)
			
 
				-            for _ in range(50):
			
 
				-                current_image = image
			
 
				-
			
 
				-                w = random.uniform(0.3 * width, width)
			
 
				-                h = random.uniform(0.3 * height, height)
			
 
				-
			
 
				-                # aspect ratio constraint b/t .5 & 2
			
 
				-                if h / w < 0.5 or h / w > 2:
			
 
				-                    continue
			
 
				-
			
 
				-                left = random.uniform(width - w)
			
 
				-                top = random.uniform(height - h)
			
 
				-
			
 
				-                # convert to integer rect x1,y1,x2,y2
			
 
				-                rect = np.array([int(left), int(top), int(left+w), int(top+h)])
			
 
				-
			
 
				-                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
			
 
				-                overlap = self.compute_iou(boxes, rect)
			
 
				-
			
 
				-                # is min and max overlap constraint satisfied? if not try again
			
 
				-                if overlap.min() < min_iou and max_iou < overlap.max():
			
 
				-                    continue
			
 
				-
			
 
				-                # cut the crop from the image
			
 
				-                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
			
 
				-                                              :]
			
 
				-
			
 
				-                # keep overlap with gt box IF center in sampled patch
			
 
				-                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
			
 
				-
			
 
				-                # mask in all gt boxes that above and to the left of centers
			
 
				-                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
			
 
				-
			
 
				-                # mask in all gt boxes that under and to the right of centers
			
 
				-                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
			
 
				-
			
 
				-                # mask in that both m1 and m2 are true
			
 
				-                mask = m1 * m2
			
 
				-
			
 
				-                # have any valid boxes? try again if not
			
 
				-                if not mask.any():
			
 
				-                    continue
			
 
				-
			
 
				-                # take only matching gt boxes
			
 
				-                current_boxes = boxes[mask, :].copy()
			
 
				-
			
 
				-                # take only matching gt labels
			
 
				-                current_labels = labels[mask]
			
 
				-
			
 
				-                # should we use the box left and top corner or the crop's
			
 
				-                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
			
 
				-                                                  rect[:2])
			
 
				-                # adjust to crop (by substracting crop's left,top)
			
 
				-                current_boxes[:, :2] -= rect[:2]
			
 
				-
			
 
				-                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
			
 
				-                                                  rect[2:])
			
 
				-                # adjust to crop (by substracting crop's left,top)
			
 
				-                current_boxes[:, 2:] -= rect[:2]
			
 
				-
			
 
				-                # update target
			
 
				-                target["boxes"] = current_boxes
			
 
				-                target["labels"] = current_labels
			
 
				-
			
 
				-                return current_image, target
			
 
				-
			
 
				-## Random JitterCrop
			
 
				-class RandomJitterCrop(object):
			
 
				-    """Jitter and crop the image and box."""
			
 
				-    def __init__(self, fill_value, p=0.5, jitter_ratio=0.3):
			
 
				-        super().__init__()
			
 
				-        self.p = p
			
 
				-        self.jitter_ratio = jitter_ratio
			
 
				-        self.fill_value = fill_value
			
 
				-
			
 
				-    def crop(self, image, pleft, pright, ptop, pbot, output_size):
			
 
				-        oh, ow = image.shape[:2]
			
 
				-
			
 
				-        swidth, sheight = output_size
			
 
				-
			
 
				-        src_rect = [pleft, ptop, swidth + pleft,
			
 
				-                    sheight + ptop]  # x1,y1,x2,y2
			
 
				-        img_rect = [0, 0, ow, oh]
			
 
				-        # rect intersection
			
 
				-        new_src_rect = [max(src_rect[0], img_rect[0]),
			
 
				-                        max(src_rect[1], img_rect[1]),
			
 
				-                        min(src_rect[2], img_rect[2]),
			
 
				-                        min(src_rect[3], img_rect[3])]
			
 
				-        dst_rect = [max(0, -pleft),
			
 
				-                    max(0, -ptop),
			
 
				-                    max(0, -pleft) + new_src_rect[2] - new_src_rect[0],
			
 
				-                    max(0, -ptop) + new_src_rect[3] - new_src_rect[1]]
			
 
				-
			
 
				-        # crop the image
			
 
				-        cropped = np.ones([sheight, swidth, 3], dtype=image.dtype) * self.fill_value
			
 
				-        # cropped[:, :, ] = np.mean(image, axis=(0, 1))
			
 
				-        cropped[dst_rect[1]:dst_rect[3], dst_rect[0]:dst_rect[2]] = \
			
 
				-            image[new_src_rect[1]:new_src_rect[3],
			
 
				-            new_src_rect[0]:new_src_rect[2]]
			
 
				-
			
 
				-        return cropped
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        if random.random() > self.p:
			
 
				-            return image, target
			
 
				-        else:
			
 
				-            oh, ow = image.shape[:2]
			
 
				-            dw = int(ow * self.jitter_ratio)
			
 
				-            dh = int(oh * self.jitter_ratio)
			
 
				-            pleft = np.random.randint(-dw, dw)
			
 
				-            pright = np.random.randint(-dw, dw)
			
 
				-            ptop = np.random.randint(-dh, dh)
			
 
				-            pbot = np.random.randint(-dh, dh)
			
 
				-
			
 
				-            swidth = ow - pleft - pright
			
 
				-            sheight = oh - ptop - pbot
			
 
				-            output_size = (swidth, sheight)
			
 
				-            # crop image
			
 
				-            cropped_image = self.crop(image=image,
			
 
				-                                    pleft=pleft, 
			
 
				-                                    pright=pright, 
			
 
				-                                    ptop=ptop, 
			
 
				-                                    pbot=pbot,
			
 
				-                                    output_size=output_size)
			
 
				-            # crop bbox
			
 
				-            if target is not None:
			
 
				-                bboxes = target['boxes'].copy()
			
 
				-                coords_offset = np.array([pleft, ptop], dtype=np.float32)
			
 
				-                bboxes[..., [0, 2]] = bboxes[..., [0, 2]] - coords_offset[0]
			
 
				-                bboxes[..., [1, 3]] = bboxes[..., [1, 3]] - coords_offset[1]
			
 
				-                swidth, sheight = output_size
			
 
				-
			
 
				-                bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], 0, swidth - 1)
			
 
				-                bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], 0, sheight - 1)
			
 
				-                target['boxes'] = bboxes
			
 
				-
			
 
				-            return cropped_image, target
			
 
				-    
			
 
				-## Random HFlip
			
 
				-class RandomHorizontalFlip(object):
			
 
				-    def __init__(self, p=0.5):
			
 
				-        self.p = p
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        if random.random() < self.p:
			
 
				-            orig_h, orig_w = image.shape[:2]
			
 
				-            image = image[:, ::-1]
			
 
				-            if target is not None:
			
 
				-                if "boxes" in target:
			
 
				-                    boxes = target["boxes"].copy()
			
 
				-                    boxes[..., [0, 2]] = orig_w - boxes[..., [2, 0]]
			
 
				-                    target["boxes"] = boxes
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-## Resize tensor image
			
 
				-class Resize(object):
			
 
				-    def __init__(self, img_size=640):
			
 
				-        self.img_size = img_size
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        orig_h, orig_w = image.shape[:2]
			
 
				-
			
 
				-        # resize
			
 
				-        image = cv2.resize(image, (self.img_size, self.img_size)).astype(np.float32)
			
 
				-        img_h, img_w = image.shape[:2]
			
 
				-
			
 
				-        # rescale bboxes
			
 
				-        if target is not None:
			
 
				-            boxes = target["boxes"]
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
			
 
				-            target["boxes"] = boxes
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-## Normalize tensor image
			
 
				-class Normalize(object):
			
 
				-    def __init__(self, pixel_mean, pixel_std):
			
 
				-        self.pixel_mean = pixel_mean
			
 
				-        self.pixel_std = pixel_std
			
 
				-
			
 
				-    def __call__(self, image, target=None):
			
 
				-        # normalize image
			
 
				-        image = (image - self.pixel_mean) / self.pixel_std
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-## Convert ndarray to torch.Tensor
			
 
				-class ToTensor(object):
			
 
				-    def __call__(self, image, target=None):        
			
 
				-        # Convert torch.Tensor
			
 
				-        image = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
			
 
				-
			
 
				-        if target is not None:
			
 
				-            target["boxes"] = torch.as_tensor(target["boxes"]).float()
			
 
				-            target["labels"] = torch.as_tensor(target["labels"]).long()
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-
			
 
				-# ------------------------- Preprocessers -------------------------
			
 
				-## Transform for Train
			
 
				-class RTDetrAugmentation(object):
			
 
				-    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
			
 
				-        # ----------------- Basic parameters -----------------
			
 
				-        self.img_size = img_size
			
 
				-        self.pixel_mean = pixel_mean  # RGB format
			
 
				-        self.pixel_std = pixel_std    # RGB format
			
 
				-        self.color_format = 'rgb'
			
 
				-        print("================= Pixel Statistics =================")
			
 
				-        print("Pixel mean: {}".format(self.pixel_mean))
			
 
				-        print("Pixel std:  {}".format(self.pixel_std))
			
 
				-
			
 
				-        # ----------------- Transforms -----------------
			
 
				-        self.augment = Compose([
			
 
				-            RandomPhotometricDistort(hue=0.5, saturation=1.5, exposure=1.5),
			
 
				-            RandomJitterCrop(p=0.8, jitter_ratio=0.3, fill_value=self.pixel_mean[::-1]),
			
 
				-            RandomHorizontalFlip(p=0.5),
			
 
				-            Resize(img_size=self.img_size),
			
 
				-            ConvertColorFormat(self.color_format),
			
 
				-            Normalize(self.pixel_mean, self.pixel_std),
			
 
				-            ToTensor()
			
 
				-        ])
			
 
				-
			
 
				-    def reset_weak_augment(self):
			
 
				-        print("Reset transform with weak augmentation ...")
			
 
				-        self.augment = Compose([
			
 
				-            RandomHorizontalFlip(p=0.5),
			
 
				-            Resize(img_size=self.img_size),
			
 
				-            ConvertColorFormat(self.color_format),
			
 
				-            Normalize(self.pixel_mean, self.pixel_std),
			
 
				-            ToTensor()
			
 
				-        ])
			
 
				-
			
 
				-
			
 
				-    def __call__(self, image, target, mosaic=False):
			
 
				-        orig_h, orig_w = image.shape[:2]
			
 
				-        ratio = [self.img_size / orig_w, self.img_size / orig_h]
			
 
				-
			
 
				-        image, target = self.augment(image, target)
			
 
				-
			
 
				-        return image, target, ratio
			
 
				-
			
 
				-## Transform for Eval
			
 
				-class RTDetrBaseTransform(object):
			
 
				-    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
			
 
				-        # ----------------- Basic parameters -----------------
			
 
				-        self.img_size = img_size
			
 
				-        self.pixel_mean = pixel_mean  # RGB format
			
 
				-        self.pixel_std = pixel_std    # RGB format
			
 
				-        self.color_format = 'rgb'
			
 
				-        print("================= Pixel Statistics =================")
			
 
				-        print("Pixel mean: {}".format(self.pixel_mean))
			
 
				-        print("Pixel std:  {}".format(self.pixel_std))
			
 
				-
			
 
				-        # ----------------- Transforms -----------------
			
 
				-        self.transform = Compose([
			
 
				-            Resize(img_size=self.img_size),
			
 
				-            ConvertColorFormat(self.color_format),
			
 
				-            Normalize(self.pixel_mean, self.pixel_std),
			
 
				-            ToTensor()
			
 
				-        ])
			
 
				-
			
 
				-
			
 
				-    def __call__(self, image, target=None, mosaic=False):
			
 
				-        orig_h, orig_w = image.shape[:2]
			
 
				-        ratio = [self.img_size / orig_w, self.img_size / orig_h]
			
 
				-
			
 
				-        image, target = self.transform(image, target)
			
 
				-
			
 
				-        return image, target, ratio
			
--- a/dataset/data_augment/strong_augment.py
+++ b/dataset/data_augment/strong_augment.py
@@ -15,7 +15,6 @@ class MosaicAugment(object):
 
				                  ) -> None:
			
 
				         self.img_size = img_size
			
 
				         self.is_train = is_train
			
 
				-        self.keep_ratio    = transform_config['mosaic_keep_ratio']
			
 
				         self.affine_params = transform_config['affine_params']
			
 
				         self.mosaic_type   = transform_config['mosaic_type']
			
 
				 
			
@@ -37,14 +36,10 @@ class MosaicAugment(object):
 
				             orig_h, orig_w, _ = img_i.shape
			
 
				 
			
 
				             # resize
			
 
				-            if self.keep_ratio:
			
 
				-                r = self.img_size / max(orig_h, orig_w)
			
 
				-                if r != 1: 
			
 
				-                    interp = cv2.INTER_LINEAR if (self.is_train or r > 1) else cv2.INTER_AREA
			
 
				-                    img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
			
 
				-            else:
			
 
				-                interp = cv2.INTER_LINEAR if self.is_train else cv2.INTER_AREA
			
 
				-                img_i = cv2.resize(img_i, (self.img_size, self.img_size), interpolation=interp)
			
 
				+            r = self.img_size / max(orig_h, orig_w)
			
 
				+            if r != 1: 
			
 
				+                interp = cv2.INTER_LINEAR if (self.is_train or r > 1) else cv2.INTER_AREA
			
 
				+                img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
			
 
				             h, w, _ = img_i.shape
			
 
				 
			
 
				             # place img in img4
			
--- a/dataset/voc.py
+++ b/dataset/voc.py
@@ -70,7 +70,6 @@ class VOCDataset(data.Dataset):
 
				                  trans_config = None,
			
 
				                  transform    = None,
			
 
				                  is_train     :bool = False,
			
 
				-                 load_cache   :bool = False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				         self.img_size = img_size
			
@@ -95,8 +94,8 @@ class VOCDataset(data.Dataset):
 
				         if is_train:
			
 
				             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				-            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
@@ -105,13 +104,6 @@ class VOCDataset(data.Dataset):
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				-        print('==============================')
			
 
				-        # ----------- Cached data -----------
			
 
				-        self.load_cache = load_cache
			
 
				-        self.cached_datas = None
			
 
				-        if self.load_cache:
			
 
				-            self.cached_datas = self._load_cache()
			
 
				-
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __getitem__(self, index):
			
@@ -121,38 +113,6 @@ class VOCDataset(data.Dataset):
 
				     def __len__(self):
			
 
				         return self.dataset_size
			
 
				 
			
 
				-    def _load_cache(self):
			
 
				-        data_items = []
			
 
				-        for idx in range(self.dataset_size):
			
 
				-            if idx % 2000 == 0:
			
 
				-                print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
			
 
				-
			
 
				-            # load a data
			
 
				-            image, target = self.load_image_target(idx)
			
 
				-            orig_h, orig_w, _ = image.shape
			
 
				-
			
 
				-            # resize image
			
 
				-            r = self.img_size / max(orig_h, orig_w)
			
 
				-            if r != 1: 
			
 
				-                interp = cv2.INTER_LINEAR
			
 
				-                new_size = (int(orig_w * r), int(orig_h * r))
			
 
				-                image = cv2.resize(image, new_size, interpolation=interp)
			
 
				-            img_h, img_w = image.shape[:2]
			
 
				-
			
 
				-            # rescale bbox
			
 
				-            boxes = target["boxes"].copy()
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
			
 
				-            target["boxes"] = boxes
			
 
				-
			
 
				-            dict_item = {}
			
 
				-            dict_item["image"] = image
			
 
				-            dict_item["target"] = target
			
 
				-
			
 
				-            data_items.append(dict_item)
			
 
				-        
			
 
				-        return data_items
			
 
				-
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				         # ------------ Prepare 4 indexes of images ------------
			
@@ -191,28 +151,20 @@ class VOCDataset(data.Dataset):
 
				     
			
 
				     # ------------ Load data function ------------
			
 
				     def load_image_target(self, index):
			
 
				-        # == Load a data from the cached data ==
			
 
				-        if self.cached_datas is not None:
			
 
				-            # load a data
			
 
				-            data_item = self.cached_datas[index]
			
 
				-            image = data_item["image"]
			
 
				-            target = data_item["target"]
			
 
				-        # == Load a data from the local disk ==
			
 
				-        else:        
			
 
				-            # load an image
			
 
				-            image, _ = self.pull_image(index)
			
 
				-            height, width, channels = image.shape
			
 
				-
			
 
				-            # laod an annotation
			
 
				-            anno, _ = self.pull_anno(index)
			
 
				-
			
 
				-            # guard against no boxes via resizing
			
 
				-            anno = np.array(anno).reshape(-1, 5)
			
 
				-            target = {
			
 
				-                "boxes": anno[:, :4],
			
 
				-                "labels": anno[:, 4],
			
 
				-                "orig_size": [height, width]
			
 
				-            }
			
 
				+        # load an image
			
 
				+        image, _ = self.pull_image(index)
			
 
				+        height, width, channels = image.shape
			
 
				+
			
 
				+        # laod an annotation
			
 
				+        anno, _ = self.pull_anno(index)
			
 
				+
			
 
				+        # guard against no boxes via resizing
			
 
				+        anno = np.array(anno).reshape(-1, 5)
			
 
				+        target = {
			
 
				+            "boxes": anno[:, :4],
			
 
				+            "labels": anno[:, 4],
			
 
				+            "orig_size": [height, width]
			
 
				+        }
			
 
				         
			
 
				         return image, target
			
 
				 
			
@@ -262,7 +214,7 @@ if __name__ == "__main__":
 
				     parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				                         help='input image size.')
			
 
				     parser.add_argument('--aug_type', type=str, default='ssd',
			
 
				-                        help='augmentation type: ssd, yolov5, rtdetr.')
			
 
				+                        help='augmentation type: ssd, yolo.')
			
 
				     parser.add_argument('--mosaic', default=0., type=float,
			
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=0., type=float,
			
@@ -271,8 +223,6 @@ if __name__ == "__main__":
 
				                         help='mixup augmentation.')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
 
				-    parser.add_argument('--load_cache', action="store_true", default=False,
			
 
				-                        help='Path to the cached data.')
			
 
				     
			
 
				     args = parser.parse_args()
			
 
				 
			
@@ -295,7 +245,7 @@ if __name__ == "__main__":
 
				         # Mosaic & Mixup
			
 
				         'mosaic_keep_ratio': False,
			
 
				         'mosaic_prob': args.mosaic,
			
 
				-        'mixup_prob': args.mixup,
			
 
				+        'mixup_prob':  args.mixup,
			
 
				         'mosaic_type': 'yolov5',
			
 
				         'mixup_type':  'yolov5',
			
 
				         'mixup_scale': [0.5, 1.5]
			
@@ -312,7 +262,6 @@ if __name__ == "__main__":
 
				         trans_config=trans_config,
			
 
				         transform=transform,
			
 
				         is_train=args.is_train,
			
 
				-        load_cache=args.load_cache
			
 
				         )
			
 
				     
			
 
				     np.random.seed(0)
			
--- a/dataset/widerface.py
+++ b/dataset/widerface.py
@@ -51,8 +51,8 @@ class WiderFaceDataset(Dataset):
 
				         if is_train:
			
 
				             self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				             self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				-            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				-            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train) if self.mosaic_prob > 0. else None
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)            if self.mixup_prob > 0.  else None
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
@@ -61,7 +61,6 @@ class WiderFaceDataset(Dataset):
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				-        print('==============================')
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __len__(self):
			
@@ -203,15 +202,13 @@ if __name__ == "__main__":
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=0., type=float,
			
 
				                         help='mixup augmentation.')
			
 
				-    parser.add_argument('--mixup_type', type=str, default='yolov5_mixup',
			
 
				-                        help='mixup augmentation.')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     trans_config = {
			
 
				-        'aug_type': args.aug_type,    # optional: ssd, yolov5
			
 
				+        'aug_type': args.aug_type,    # optional: ssd, yolo
			
 
				         'pixel_mean': [0., 0., 0.],
			
 
				         'pixel_std':  [255., 255., 255.],
			
 
				         # Basic Augment
			
@@ -227,9 +224,8 @@ if __name__ == "__main__":
 
				         # Mosaic & Mixup
			
 
				         'mosaic_prob': args.mosaic,
			
 
				         'mixup_prob': args.mixup,
			
 
				-        'mosaic_type': 'yolov5_mosaic',
			
 
				-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
			
 
				-        'mosaic_keep_ratio': False,
			
 
				+        'mosaic_type': 'yolov5',
			
 
				+        'mixup_type':  'yolov5',   # optional: yolov5, yolox
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
 
				     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
			
--- a/engine.py
+++ b/engine.py
@@ -16,17 +16,16 @@ from utils.vis_tools import vis_data
 
				 from evaluator.build import build_evluator
			
 
				 
			
 
				 # ----------------- Optimizer & LrScheduler Components -----------------
			
 
				-from utils.solver.optimizer import build_yolo_optimizer, build_rtdetr_optimizer
			
 
				+from utils.solver.optimizer import build_optimizer
			
 
				 from utils.solver.lr_scheduler import build_lambda_lr_scheduler
			
 
				-from utils.solver.lr_scheduler import build_wp_lr_scheduler, build_lr_scheduler
			
 
				 
			
 
				 # ----------------- Dataset Components -----------------
			
 
				 from dataset.build import build_dataset, build_transform
			
 
				 
			
 
				 
			
 
				 # ----------------------- Det trainers -----------------------
			
 
				-## YOLOv8 Trainer
			
 
				-class Yolov8Trainer(object):
			
 
				+## YOLOX Trainer
			
 
				+class YoloxTrainer(object):
			
 
				     def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
			
 
				         # ------------------- basic parameters -------------------
			
 
				         self.args = args
			
@@ -35,9 +34,9 @@ class Yolov8Trainer(object):
 
				         self.device = device
			
 
				         self.criterion = criterion
			
 
				         self.world_size = world_size
			
 
				+        self.grad_accumulate = args.grad_accumulate
			
 
				+        self.no_aug_epoch = args.no_aug_epoch
			
 
				         self.heavy_eval = False
			
 
				-        self.last_opt_step = 0
			
 
				-        self.clip_grad = 10
			
 
				         # weak augmentatino stage
			
 
				         self.second_stage = False
			
 
				         self.third_stage = False
			
@@ -47,10 +46,10 @@ class Yolov8Trainer(object):
 
				         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				         os.makedirs(self.path_to_save, exist_ok=True)
			
 
				 
			
 
				-        # ---------------------------- Hyperparameters refer to YOLOv8 ----------------------------
			
 
				-        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.937, 'weight_decay': 5e-4, 'lr0': 0.01}
			
 
				+        # ---------------------------- Hyperparameters refer to YOLOX ----------------------------
			
 
				+        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.9, 'weight_decay': 5e-4, 'lr0': 0.01}
			
 
				         self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
			
 
				-        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
			
 
				+        self.lr_schedule_dict = {'scheduler': 'cosine', 'lrf': 0.05}
			
 
				         self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
			
 
				 
			
 
				         # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
			
@@ -60,9 +59,9 @@ class Yolov8Trainer(object):
 
				 
			
 
				         # ---------------------------- Build Transform ----------------------------
			
 
				         self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=model_cfg['max_stride'], is_train=True)
			
 
				+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				         self.val_transform, _ = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=model_cfg['max_stride'], is_train=False)
			
 
				+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				 
			
 
				         # ---------------------------- Build Dataset & Dataloader ----------------------------
			
 
				         self.dataset, self.dataset_info = build_dataset(self.args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
@@ -75,13 +74,11 @@ class Yolov8Trainer(object):
 
				         self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
			
 
				 
			
 
				         # ---------------------------- Build Optimizer ----------------------------
			
 
				-        accumulate = max(1, round(64 / self.args.batch_size))
			
 
				-        print('Grad Accumulate: {}'.format(accumulate))
			
 
				-        self.optimizer_dict['weight_decay'] *= self.args.batch_size * accumulate / 64
			
 
				-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, self.args.resume)
			
 
				+        self.optimizer_dict['lr0'] *= self.args.batch_size * self.grad_accumulate / 64
			
 
				+        self.optimizer, self.start_epoch = build_optimizer(self.optimizer_dict, model, self.args.resume)
			
 
				 
			
 
				         # ---------------------------- Build LR Scheduler ----------------------------
			
 
				-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch)
			
 
				+        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch - self.no_aug_epoch)
			
 
				         self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
			
 
				         if self.args.resume and self.args.resume != 'None':
			
 
				             self.lr_scheduler.step()
			
@@ -93,6 +90,7 @@ class Yolov8Trainer(object):
 
				         else:
			
 
				             self.model_ema = None
			
 
				 
			
 
				+
			
 
				     def train(self, model):
			
 
				         for epoch in range(self.start_epoch, self.args.max_epoch):
			
 
				             if self.args.distributed:
			
@@ -125,7 +123,7 @@ class Yolov8Trainer(object):
 
				                             'epoch': self.epoch,
			
 
				                             'args': self.args}, 
			
 
				                             checkpoint_path)
			
 
				-
			
 
				+                
			
 
				             # train one epoch
			
 
				             self.epoch = epoch
			
 
				             self.train_one_epoch(model)
			
@@ -200,15 +198,13 @@ class Yolov8Trainer(object):
 
				         img_size = self.args.img_size
			
 
				         t0 = time.time()
			
 
				         nw = epoch_size * self.args.wp_epoch
			
 
				-        accumulate = accumulate = max(1, round(64 / self.args.batch_size))
			
 
				 
			
 
				-        # train one epoch
			
 
				+        # Train one epoch
			
 
				         for iter_i, (images, targets) in enumerate(self.train_loader):
			
 
				             ni = iter_i + self.epoch * epoch_size
			
 
				             # Warmup
			
 
				             if ni <= nw:
			
 
				                 xi = [0, nw]  # x interp
			
 
				-                accumulate = max(1, np.interp(ni, xi, [1, 64 / self.args.batch_size]).round())
			
 
				                 for j, x in enumerate(self.optimizer.param_groups):
			
 
				                     # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
			
 
				                     x['lr'] = np.interp(
			
@@ -216,54 +212,45 @@ class Yolov8Trainer(object):
 
				                     if 'momentum' in x:
			
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				-            # to device
			
 
				+            # To device
			
 
				             images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				-            if self.args.multi_scale:
			
 
				+            if self.args.multi_scale and ni % 10 == 0:
			
 
				                 images, targets, img_size = self.rescale_image_targets(
			
 
				                     images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
			
 
				             else:
			
 
				                 targets = self.refine_targets(targets, self.args.min_box_size)
			
 
				                 
			
 
				-            # visualize train targets
			
 
				+            # Visualize train targets
			
 
				             if self.args.vis_tgt:
			
 
				                 vis_data(images*255, targets)
			
 
				 
			
 
				-            # inference
			
 
				+            # Inference
			
 
				             with torch.cuda.amp.autocast(enabled=self.args.fp16):
			
 
				                 outputs = model(images)
			
 
				-                # loss
			
 
				+                # Compute loss
			
 
				                 loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
			
 
				                 losses = loss_dict['losses']
			
 
				-                losses *= images.shape[0]  # loss * bs
			
 
				+                # Grad Accu
			
 
				+                if self.grad_accumulate > 1: 
			
 
				+                    losses /= self.grad_accumulate
			
 
				 
			
 
				-                # reduce            
			
 
				                 loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
			
 
				 
			
 
				-                # gradient averaged between devices in DDP mode
			
 
				-                losses *= distributed_utils.get_world_size()
			
 
				-
			
 
				-            # backward
			
 
				+            # Backward
			
 
				             self.scaler.scale(losses).backward()
			
 
				 
			
 
				             # Optimize
			
 
				-            if ni - self.last_opt_step >= accumulate:
			
 
				-                if self.clip_grad > 0:
			
 
				-                    # unscale gradients
			
 
				-                    self.scaler.unscale_(self.optimizer)
			
 
				-                    # clip gradients
			
 
				-                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
			
 
				-                # optimizer.step
			
 
				+            if ni % self.grad_accumulate == 0:
			
 
				                 self.scaler.step(self.optimizer)
			
 
				                 self.scaler.update()
			
 
				                 self.optimizer.zero_grad()
			
 
				                 # ema
			
 
				                 if self.model_ema is not None:
			
 
				                     self.model_ema.update(model)
			
 
				-                self.last_opt_step = ni
			
 
				 
			
 
				-            # display
			
 
				+            # Logs
			
 
				             if distributed_utils.is_main_process() and iter_i % 10 == 0:
			
 
				                 t1 = time.time()
			
 
				                 cur_lr = [param_group['lr']  for param_group in self.optimizer.param_groups]
			
@@ -273,7 +260,10 @@ class Yolov8Trainer(object):
 
				                 log += '[lr: {:.6f}]'.format(cur_lr[2])
			
 
				                 # loss infor
			
 
				                 for k in loss_dict_reduced.keys():
			
 
				-                    log += '[{}: {:.2f}]'.format(k, loss_dict_reduced[k])
			
 
				+                    loss_val = loss_dict_reduced[k]
			
 
				+                    if k == 'losses':
			
 
				+                        loss_val *= self.grad_accumulate
			
 
				+                    log += '[{}: {:.2f}]'.format(k, loss_val)
			
 
				 
			
 
				                 # other infor
			
 
				                 log += '[time: {:.2f}]'.format(t1 - t0)
			
@@ -283,12 +273,14 @@ class Yolov8Trainer(object):
 
				                 print(log, flush=True)
			
 
				                 
			
 
				                 t0 = time.time()
			
 
				-        
			
 
				+
			
 
				             if self.args.debug:
			
 
				                 print("For debug mode, we only train 1 iteration")
			
 
				                 break
			
 
				 
			
 
				-        self.lr_scheduler.step()
			
 
				+        # LR Schedule
			
 
				+        if not self.second_stage:
			
 
				+            self.lr_scheduler.step()
			
 
				         
			
 
				     def check_second_stage(self):
			
 
				         # set second stage
			
@@ -369,8 +361,13 @@ class Yolov8Trainer(object):
 
				 
			
 
				         # During training phase, the shape of input image is square.
			
 
				         old_img_size = images.shape[-1]
			
 
				-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
			
 
				+        min_img_size = old_img_size * multi_scale_range[0]
			
 
				+        max_img_size = old_img_size * multi_scale_range[1]
			
 
				+
			
 
				+        # Choose a new image size
			
 
				+        new_img_size = random.randrange(min_img_size, max_img_size + max_stride, max_stride)
			
 
				         new_img_size = new_img_size // max_stride * max_stride  # size
			
 
				+        
			
 
				         if new_img_size / old_img_size != 1:
			
 
				             # interpolate
			
 
				             images = torch.nn.functional.interpolate(
			
@@ -396,8 +393,8 @@ class Yolov8Trainer(object):
 
				 
			
 
				         return images, targets, new_img_size
			
 
				 
			
 
				-## YOLOX Trainer
			
 
				-class YoloxTrainer(object):
			
 
				+## Real-time Convolutional Object Detector Trainer
			
 
				+class RTCTrainer(object):
			
 
				     def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
			
 
				         # ------------------- basic parameters -------------------
			
 
				         self.args = args
			
@@ -407,7 +404,7 @@ class YoloxTrainer(object):
 
				         self.criterion = criterion
			
 
				         self.world_size = world_size
			
 
				         self.grad_accumulate = args.grad_accumulate
			
 
				-        self.no_aug_epoch = args.no_aug_epoch
			
 
				+        self.clip_grad = 35
			
 
				         self.heavy_eval = False
			
 
				         # weak augmentatino stage
			
 
				         self.second_stage = False
			
@@ -418,39 +415,39 @@ class YoloxTrainer(object):
 
				         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				         os.makedirs(self.path_to_save, exist_ok=True)
			
 
				 
			
 
				-        # ---------------------------- Hyperparameters refer to YOLOX ----------------------------
			
 
				-        self.optimizer_dict = {'optimizer': 'sgd', 'momentum': 0.9, 'weight_decay': 5e-4, 'lr0': 0.01}
			
 
				-        self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
			
 
				-        self.lr_schedule_dict = {'scheduler': 'cosine', 'lrf': 0.05}
			
 
				+        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
			
 
				+        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 5e-2, 'lr0': 0.001}
			
 
				+        self.ema_dict = {'ema_decay': 0.9998, 'ema_tau': 2000}
			
 
				+        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
			
 
				         self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
			
 
				 
			
 
				         # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
			
 
				-        self.data_cfg = data_cfg
			
 
				+        self.data_cfg  = data_cfg
			
 
				         self.model_cfg = model_cfg
			
 
				         self.trans_cfg = trans_cfg
			
 
				 
			
 
				         # ---------------------------- Build Transform ----------------------------
			
 
				         self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				+            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				         self.val_transform, _ = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				+            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				 
			
 
				         # ---------------------------- Build Dataset & Dataloader ----------------------------
			
 
				-        self.dataset, self.dataset_info = build_dataset(self.args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
 
				-        self.train_loader = build_dataloader(self.args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
			
 
				+        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
 
				+        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
			
 
				 
			
 
				         # ---------------------------- Build Evaluator ----------------------------
			
 
				-        self.evaluator = build_evluator(self.args, self.data_cfg, self.val_transform, self.device)
			
 
				+        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
			
 
				 
			
 
				         # ---------------------------- Build Grad. Scaler ----------------------------
			
 
				         self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
			
 
				 
			
 
				         # ---------------------------- Build Optimizer ----------------------------
			
 
				-        self.optimizer_dict['lr0'] *= self.args.batch_size * self.grad_accumulate / 64
			
 
				-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, self.args.resume)
			
 
				+        self.optimizer_dict['lr0'] *= args.batch_size * self.grad_accumulate / 64
			
 
				+        self.optimizer, self.start_epoch = build_optimizer(self.optimizer_dict, model, args.resume)
			
 
				 
			
 
				         # ---------------------------- Build LR Scheduler ----------------------------
			
 
				-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, self.args.max_epoch - self.no_aug_epoch)
			
 
				+        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.max_epoch)
			
 
				         self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
			
 
				         if self.args.resume and self.args.resume != 'None':
			
 
				             self.lr_scheduler.step()
			
@@ -462,7 +459,6 @@ class YoloxTrainer(object):
 
				         else:
			
 
				             self.model_ema = None
			
 
				 
			
 
				-
			
 
				     def train(self, model):
			
 
				         for epoch in range(self.start_epoch, self.args.max_epoch):
			
 
				             if self.args.distributed:
			
@@ -495,7 +491,7 @@ class YoloxTrainer(object):
 
				                             'epoch': self.epoch,
			
 
				                             'args': self.args}, 
			
 
				                             checkpoint_path)
			
 
				-                
			
 
				+
			
 
				             # train one epoch
			
 
				             self.epoch = epoch
			
 
				             self.train_one_epoch(model)
			
@@ -565,14 +561,21 @@ class YoloxTrainer(object):
 
				             dist.barrier()
			
 
				 
			
 
				     def train_one_epoch(self, model):
			
 
				+        metric_logger = MetricLogger(delimiter="  ")
			
 
				+        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
			
 
				+        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
			
 
				+        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
			
 
				+        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
			
 
				+        epoch_size = len(self.train_loader)
			
 
				+        print_freq = 10
			
 
				+
			
 
				         # basic parameters
			
 
				         epoch_size = len(self.train_loader)
			
 
				         img_size = self.args.img_size
			
 
				-        t0 = time.time()
			
 
				         nw = epoch_size * self.args.wp_epoch
			
 
				 
			
 
				         # Train one epoch
			
 
				-        for iter_i, (images, targets) in enumerate(self.train_loader):
			
 
				+        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
			
 
				             ni = iter_i + self.epoch * epoch_size
			
 
				             # Warmup
			
 
				             if ni <= nw:
			
@@ -588,7 +591,7 @@ class YoloxTrainer(object):
 
				             images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				-            if self.args.multi_scale and ni % 10 == 0:
			
 
				+            if self.args.multi_scale:
			
 
				                 images, targets, img_size = self.rescale_image_targets(
			
 
				                     images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
			
 
				             else:
			
@@ -604,8 +607,8 @@ class YoloxTrainer(object):
 
				                 # Compute loss
			
 
				                 loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
			
 
				                 losses = loss_dict['losses']
			
 
				-                # Grad Accu
			
 
				-                if self.grad_accumulate > 1: 
			
 
				+                # Grad Accumulate
			
 
				+                if self.grad_accumulate > 1:
			
 
				                     losses /= self.grad_accumulate
			
 
				 
			
 
				                 loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
			
@@ -615,6 +618,13 @@ class YoloxTrainer(object):
 
				 
			
 
				             # Optimize
			
 
				             if ni % self.grad_accumulate == 0:
			
 
				+                grad_norm = None
			
 
				+                if self.clip_grad > 0:
			
 
				+                    # unscale gradients
			
 
				+                    self.scaler.unscale_(self.optimizer)
			
 
				+                    # clip gradients
			
 
				+                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
			
 
				+                # optimizer.step
			
 
				                 self.scaler.step(self.optimizer)
			
 
				                 self.scaler.update()
			
 
				                 self.optimizer.zero_grad()
			
@@ -622,29 +632,11 @@ class YoloxTrainer(object):
 
				                 if self.model_ema is not None:
			
 
				                     self.model_ema.update(model)
			
 
				 
			
 
				-            # Logs
			
 
				-            if distributed_utils.is_main_process() and iter_i % 10 == 0:
			
 
				-                t1 = time.time()
			
 
				-                cur_lr = [param_group['lr']  for param_group in self.optimizer.param_groups]
			
 
				-                # basic infor
			
 
				-                log =  '[Epoch: {}/{}]'.format(self.epoch, self.args.max_epoch)
			
 
				-                log += '[Iter: {}/{}]'.format(iter_i, epoch_size)
			
 
				-                log += '[lr: {:.6f}]'.format(cur_lr[2])
			
 
				-                # loss infor
			
 
				-                for k in loss_dict_reduced.keys():
			
 
				-                    loss_val = loss_dict_reduced[k]
			
 
				-                    if k == 'losses':
			
 
				-                        loss_val *= self.grad_accumulate
			
 
				-                    log += '[{}: {:.2f}]'.format(k, loss_val)
			
 
				-
			
 
				-                # other infor
			
 
				-                log += '[time: {:.2f}]'.format(t1 - t0)
			
 
				-                log += '[size: {}]'.format(img_size)
			
 
				-
			
 
				-                # print log infor
			
 
				-                print(log, flush=True)
			
 
				-                
			
 
				-                t0 = time.time()
			
 
				+            # Update log
			
 
				+            metric_logger.update(**loss_dict_reduced)
			
 
				+            metric_logger.update(lr=self.optimizer.param_groups[2]["lr"])
			
 
				+            metric_logger.update(grad_norm=grad_norm)
			
 
				+            metric_logger.update(size=img_size)
			
 
				 
			
 
				             if self.args.debug:
			
 
				                 print("For debug mode, we only train 1 iteration")
			
@@ -653,60 +645,11 @@ class YoloxTrainer(object):
 
				         # LR Schedule
			
 
				         if not self.second_stage:
			
 
				             self.lr_scheduler.step()
			
 
				-        
			
 
				-    def check_second_stage(self):
			
 
				-        # set second stage
			
 
				-        print('============== Second stage of Training ==============')
			
 
				-        self.second_stage = True
			
 
				-
			
 
				-        # close mosaic augmentation
			
 
				-        if self.train_loader.dataset.mosaic_prob > 0.:
			
 
				-            print(' - Close < Mosaic Augmentation > ...')
			
 
				-            self.train_loader.dataset.mosaic_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close mixup augmentation
			
 
				-        if self.train_loader.dataset.mixup_prob > 0.:
			
 
				-            print(' - Close < Mixup Augmentation > ...')
			
 
				-            self.train_loader.dataset.mixup_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close rotation augmentation
			
 
				-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
			
 
				-            print(' - Close < degress of rotation > ...')
			
 
				-            self.trans_cfg['degrees'] = 0.0
			
 
				-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
			
 
				-            print(' - Close < shear of rotation >...')
			
 
				-            self.trans_cfg['shear'] = 0.0
			
 
				-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
			
 
				-            print(' - Close < perspective of rotation > ...')
			
 
				-            self.trans_cfg['perspective'] = 0.0
			
 
				-
			
 
				-        # build a new transform for second stage
			
 
				-        print(' - Rebuild transforms ...')
			
 
				-        self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.train_loader.dataset.transform = self.train_transform
			
 
				-        
			
 
				-    def check_third_stage(self):
			
 
				-        # set third stage
			
 
				-        print('============== Third stage of Training ==============')
			
 
				-        self.third_stage = True
			
 
				 
			
 
				-        # close random affine
			
 
				-        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
			
 
				-            print(' - Close < translate of affine > ...')
			
 
				-            self.trans_cfg['translate'] = 0.0
			
 
				-        if 'scale' in self.trans_cfg.keys():
			
 
				-            print(' - Close < scale of affine >...')
			
 
				-            self.trans_cfg['scale'] = [1.0, 1.0]
			
 
				+        # Gather the stats from all processes
			
 
				+        metric_logger.synchronize_between_processes()
			
 
				+        print("Averaged stats:", metric_logger)
			
 
				 
			
 
				-        # build a new transform for second stage
			
 
				-        print(' - Rebuild transforms ...')
			
 
				-        self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.train_loader.dataset.transform = self.train_transform
			
 
				-        
			
 
				     def refine_targets(self, targets, min_box_size):
			
 
				         # rescale targets
			
 
				         for tgt in targets:
			
@@ -733,8 +676,13 @@ class YoloxTrainer(object):
 
				 
			
 
				         # During training phase, the shape of input image is square.
			
 
				         old_img_size = images.shape[-1]
			
 
				-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
			
 
				+        min_img_size = old_img_size * multi_scale_range[0]
			
 
				+        max_img_size = old_img_size * multi_scale_range[1]
			
 
				+
			
 
				+        # Choose a new image size
			
 
				+        new_img_size = random.randrange(min_img_size, max_img_size + max_stride, max_stride)
			
 
				         new_img_size = new_img_size // max_stride * max_stride  # size
			
 
				+
			
 
				         if new_img_size / old_img_size != 1:
			
 
				             # interpolate
			
 
				             images = torch.nn.functional.interpolate(
			
@@ -760,746 +708,67 @@ class YoloxTrainer(object):
 
				 
			
 
				         return images, targets, new_img_size
			
 
				 
			
 
				-## Real-time Convolutional Object Detector Trainer
			
 
				-class RTCTrainer(object):
			
 
				-    def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
			
 
				-        # ------------------- basic parameters -------------------
			
 
				-        self.args = args
			
 
				-        self.epoch = 0
			
 
				-        self.best_map = -1.
			
 
				-        self.device = device
			
 
				-        self.criterion = criterion
			
 
				-        self.world_size = world_size
			
 
				-        self.grad_accumulate = args.grad_accumulate
			
 
				-        self.clip_grad = 35
			
 
				-        self.heavy_eval = False
			
 
				-        # weak augmentatino stage
			
 
				-        self.second_stage = False
			
 
				-        self.third_stage = False
			
 
				-        self.second_stage_epoch = args.no_aug_epoch
			
 
				-        self.third_stage_epoch = args.no_aug_epoch // 2
			
 
				-        # path to save model
			
 
				-        self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				-        os.makedirs(self.path_to_save, exist_ok=True)
			
 
				+    def check_second_stage(self):
			
 
				+        # set second stage
			
 
				+        print('============== Second stage of Training ==============')
			
 
				+        self.second_stage = True
			
 
				 
			
 
				-        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
			
 
				-        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 5e-2, 'lr0': 0.001}
			
 
				-        self.ema_dict = {'ema_decay': 0.9998, 'ema_tau': 2000}
			
 
				-        self.lr_schedule_dict = {'scheduler': 'linear', 'lrf': 0.01}
			
 
				-        self.warmup_dict = {'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1}        
			
 
				+        # close mosaic augmentation
			
 
				+        if self.train_loader.dataset.mosaic_prob > 0.:
			
 
				+            print(' - Close < Mosaic Augmentation > ...')
			
 
				+            self.train_loader.dataset.mosaic_prob = 0.
			
 
				+            self.heavy_eval = True
			
 
				 
			
 
				-        # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
			
 
				-        self.data_cfg  = data_cfg
			
 
				-        self.model_cfg = model_cfg
			
 
				-        self.trans_cfg = trans_cfg
			
 
				+        # close mixup augmentation
			
 
				+        if self.train_loader.dataset.mixup_prob > 0.:
			
 
				+            print(' - Close < Mixup Augmentation > ...')
			
 
				+            self.train_loader.dataset.mixup_prob = 0.
			
 
				+            self.heavy_eval = True
			
 
				 
			
 
				-        # ---------------------------- Build Transform ----------------------------
			
 
				+        # close rotation augmentation
			
 
				+        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
			
 
				+            print(' - Close < degress of rotation > ...')
			
 
				+            self.trans_cfg['degrees'] = 0.0
			
 
				+        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
			
 
				+            print(' - Close < shear of rotation >...')
			
 
				+            self.trans_cfg['shear'] = 0.0
			
 
				+        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
			
 
				+            print(' - Close < perspective of rotation > ...')
			
 
				+            self.trans_cfg['perspective'] = 0.0
			
 
				+
			
 
				+        # build a new transform for second stage
			
 
				+        print(' - Rebuild transforms ...')
			
 
				         self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.val_transform, _ = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				+            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				+        self.train_loader.dataset.transform = self.train_transform
			
 
				+        
			
 
				+    def check_third_stage(self):
			
 
				+        # set third stage
			
 
				+        print('============== Third stage of Training ==============')
			
 
				+        self.third_stage = True
			
 
				 
			
 
				-        # ---------------------------- Build Dataset & Dataloader ----------------------------
			
 
				-        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
 
				-        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
			
 
				-
			
 
				-        # ---------------------------- Build Evaluator ----------------------------
			
 
				-        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
			
 
				-
			
 
				-        # ---------------------------- Build Grad. Scaler ----------------------------
			
 
				-        self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
			
 
				-
			
 
				-        # ---------------------------- Build Optimizer ----------------------------
			
 
				-        self.optimizer_dict['lr0'] *= args.batch_size * self.grad_accumulate / 64
			
 
				-        self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, args.resume)
			
 
				-
			
 
				-        # ---------------------------- Build LR Scheduler ----------------------------
			
 
				-        self.lr_scheduler, self.lf = build_lambda_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.max_epoch)
			
 
				-        self.lr_scheduler.last_epoch = self.start_epoch - 1  # do not move
			
 
				-        if self.args.resume and self.args.resume != 'None':
			
 
				-            self.lr_scheduler.step()
			
 
				-
			
 
				-        # ---------------------------- Build Model-EMA ----------------------------
			
 
				-        if self.args.ema and distributed_utils.get_rank() in [-1, 0]:
			
 
				-            print('Build ModelEMA ...')
			
 
				-            self.model_ema = ModelEMA(self.ema_dict, model, self.start_epoch * len(self.train_loader))
			
 
				-        else:
			
 
				-            self.model_ema = None
			
 
				-
			
 
				-    def train(self, model):
			
 
				-        for epoch in range(self.start_epoch, self.args.max_epoch):
			
 
				-            if self.args.distributed:
			
 
				-                self.train_loader.batch_sampler.sampler.set_epoch(epoch)
			
 
				-
			
 
				-            # check second stage
			
 
				-            if epoch >= (self.args.max_epoch - self.second_stage_epoch - 1) and not self.second_stage:
			
 
				-                self.check_second_stage()
			
 
				-                # save model of the last mosaic epoch
			
 
				-                weight_name = '{}_last_mosaic_epoch.pth'.format(self.args.model)
			
 
				-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                print('Saving state of the last Mosaic epoch-{}.'.format(self.epoch))
			
 
				-                torch.save({'model': model.state_dict(),
			
 
				-                            'mAP': round(self.evaluator.map*100, 1),
			
 
				-                            'optimizer': self.optimizer.state_dict(),
			
 
				-                            'epoch': self.epoch,
			
 
				-                            'args': self.args}, 
			
 
				-                            checkpoint_path)
			
 
				-
			
 
				-            # check third stage
			
 
				-            if epoch >= (self.args.max_epoch - self.third_stage_epoch - 1) and not self.third_stage:
			
 
				-                self.check_third_stage()
			
 
				-                # save model of the last mosaic epoch
			
 
				-                weight_name = '{}_last_weak_augment_epoch.pth'.format(self.args.model)
			
 
				-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                print('Saving state of the last weak augment epoch-{}.'.format(self.epoch))
			
 
				-                torch.save({'model': model.state_dict(),
			
 
				-                            'mAP': round(self.evaluator.map*100, 1),
			
 
				-                            'optimizer': self.optimizer.state_dict(),
			
 
				-                            'epoch': self.epoch,
			
 
				-                            'args': self.args}, 
			
 
				-                            checkpoint_path)
			
 
				-
			
 
				-            # train one epoch
			
 
				-            self.epoch = epoch
			
 
				-            self.train_one_epoch(model)
			
 
				-
			
 
				-            # eval one epoch
			
 
				-            if self.heavy_eval:
			
 
				-                model_eval = model.module if self.args.distributed else model
			
 
				-                self.eval(model_eval)
			
 
				-            else:
			
 
				-                model_eval = model.module if self.args.distributed else model
			
 
				-                if (epoch % self.args.eval_epoch) == 0 or (epoch == self.args.max_epoch - 1):
			
 
				-                    self.eval(model_eval)
			
 
				-
			
 
				-            if self.args.debug:
			
 
				-                print("For debug mode, we only train 1 epoch")
			
 
				-                break
			
 
				-
			
 
				-    def eval(self, model):
			
 
				-        # chech model
			
 
				-        model_eval = model if self.model_ema is None else self.model_ema.ema
			
 
				-
			
 
				-        if distributed_utils.is_main_process():
			
 
				-            # check evaluator
			
 
				-            if self.evaluator is None:
			
 
				-                print('No evaluator ... save model and go on training.')
			
 
				-                print('Saving state, epoch: {}'.format(self.epoch))
			
 
				-                weight_name = '{}_no_eval.pth'.format(self.args.model)
			
 
				-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                torch.save({'model': model_eval.state_dict(),
			
 
				-                            'mAP': -1.,
			
 
				-                            'optimizer': self.optimizer.state_dict(),
			
 
				-                            'epoch': self.epoch,
			
 
				-                            'args': self.args}, 
			
 
				-                            checkpoint_path)               
			
 
				-            else:
			
 
				-                print('eval ...')
			
 
				-                # set eval mode
			
 
				-                model_eval.trainable = False
			
 
				-                model_eval.eval()
			
 
				-
			
 
				-                # evaluate
			
 
				-                with torch.no_grad():
			
 
				-                    self.evaluator.evaluate(model_eval)
			
 
				-
			
 
				-                # save model
			
 
				-                cur_map = self.evaluator.map
			
 
				-                if cur_map > self.best_map:
			
 
				-                    # update best-map
			
 
				-                    self.best_map = cur_map
			
 
				-                    # save model
			
 
				-                    print('Saving state, epoch:', self.epoch)
			
 
				-                    weight_name = '{}_best.pth'.format(self.args.model)
			
 
				-                    checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                    torch.save({'model': model_eval.state_dict(),
			
 
				-                                'mAP': round(self.best_map*100, 1),
			
 
				-                                'optimizer': self.optimizer.state_dict(),
			
 
				-                                'epoch': self.epoch,
			
 
				-                                'args': self.args}, 
			
 
				-                                checkpoint_path)                      
			
 
				-
			
 
				-                # set train mode.
			
 
				-                model_eval.trainable = True
			
 
				-                model_eval.train()
			
 
				-
			
 
				-        if self.args.distributed:
			
 
				-            # wait for all processes to synchronize
			
 
				-            dist.barrier()
			
 
				-
			
 
				-    def train_one_epoch(self, model):
			
 
				-        metric_logger = MetricLogger(delimiter="  ")
			
 
				-        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
			
 
				-        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
			
 
				-        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
			
 
				-        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
			
 
				-        epoch_size = len(self.train_loader)
			
 
				-        print_freq = 10
			
 
				-
			
 
				-        # basic parameters
			
 
				-        epoch_size = len(self.train_loader)
			
 
				-        img_size = self.args.img_size
			
 
				-        nw = epoch_size * self.args.wp_epoch
			
 
				-
			
 
				-        # Train one epoch
			
 
				-        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
			
 
				-            ni = iter_i + self.epoch * epoch_size
			
 
				-            # Warmup
			
 
				-            if ni <= nw:
			
 
				-                xi = [0, nw]  # x interp
			
 
				-                for j, x in enumerate(self.optimizer.param_groups):
			
 
				-                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
			
 
				-                    x['lr'] = np.interp(
			
 
				-                        ni, xi, [self.warmup_dict['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * self.lf(self.epoch)])
			
 
				-                    if 'momentum' in x:
			
 
				-                        x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				-                                
			
 
				-            # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float()
			
 
				-
			
 
				-            # Multi scale
			
 
				-            if self.args.multi_scale:
			
 
				-                images, targets, img_size = self.rescale_image_targets(
			
 
				-                    images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
			
 
				-            else:
			
 
				-                targets = self.refine_targets(targets, self.args.min_box_size)
			
 
				-                
			
 
				-            # Visualize train targets
			
 
				-            if self.args.vis_tgt:
			
 
				-                vis_data(images*255, targets)
			
 
				-
			
 
				-            # Inference
			
 
				-            with torch.cuda.amp.autocast(enabled=self.args.fp16):
			
 
				-                outputs = model(images)
			
 
				-                # Compute loss
			
 
				-                loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
			
 
				-                losses = loss_dict['losses']
			
 
				-                # Grad Accumulate
			
 
				-                if self.grad_accumulate > 1:
			
 
				-                    losses /= self.grad_accumulate
			
 
				-
			
 
				-                loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
			
 
				-
			
 
				-            # Backward
			
 
				-            self.scaler.scale(losses).backward()
			
 
				-
			
 
				-            # Optimize
			
 
				-            if ni % self.grad_accumulate == 0:
			
 
				-                grad_norm = None
			
 
				-                if self.clip_grad > 0:
			
 
				-                    # unscale gradients
			
 
				-                    self.scaler.unscale_(self.optimizer)
			
 
				-                    # clip gradients
			
 
				-                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
			
 
				-                # optimizer.step
			
 
				-                self.scaler.step(self.optimizer)
			
 
				-                self.scaler.update()
			
 
				-                self.optimizer.zero_grad()
			
 
				-                # ema
			
 
				-                if self.model_ema is not None:
			
 
				-                    self.model_ema.update(model)
			
 
				-
			
 
				-            # Update log
			
 
				-            metric_logger.update(**loss_dict_reduced)
			
 
				-            metric_logger.update(lr=self.optimizer.param_groups[2]["lr"])
			
 
				-            metric_logger.update(grad_norm=grad_norm)
			
 
				-            metric_logger.update(size=img_size)
			
 
				-
			
 
				-            if self.args.debug:
			
 
				-                print("For debug mode, we only train 1 iteration")
			
 
				-                break
			
 
				-
			
 
				-        # LR Schedule
			
 
				-        if not self.second_stage:
			
 
				-            self.lr_scheduler.step()
			
 
				-
			
 
				-        # Gather the stats from all processes
			
 
				-        metric_logger.synchronize_between_processes()
			
 
				-        print("Averaged stats:", metric_logger)
			
 
				-
			
 
				-    def refine_targets(self, targets, min_box_size):
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes = tgt["boxes"].clone()
			
 
				-            labels = tgt["labels"].clone()
			
 
				-            # refine tgt
			
 
				-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
			
 
				-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
			
 
				-            keep = (min_tgt_size >= min_box_size)
			
 
				-
			
 
				-            tgt["boxes"] = boxes[keep]
			
 
				-            tgt["labels"] = labels[keep]
			
 
				-        
			
 
				-        return targets
			
 
				-
			
 
				-    def rescale_image_targets(self, images, targets, stride, min_box_size, multi_scale_range=[0.5, 1.5]):
			
 
				-        """
			
 
				-            Deployed for Multi scale trick.
			
 
				-        """
			
 
				-        if isinstance(stride, int):
			
 
				-            max_stride = stride
			
 
				-        elif isinstance(stride, list):
			
 
				-            max_stride = max(stride)
			
 
				-
			
 
				-        # During training phase, the shape of input image is square.
			
 
				-        old_img_size = images.shape[-1]
			
 
				-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
			
 
				-        new_img_size = new_img_size // max_stride * max_stride  # size
			
 
				-        if new_img_size / old_img_size != 1:
			
 
				-            # interpolate
			
 
				-            images = torch.nn.functional.interpolate(
			
 
				-                                input=images, 
			
 
				-                                size=new_img_size, 
			
 
				-                                mode='bilinear', 
			
 
				-                                align_corners=False)
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes = tgt["boxes"].clone()
			
 
				-            labels = tgt["labels"].clone()
			
 
				-            boxes = torch.clamp(boxes, 0, old_img_size)
			
 
				-            # rescale box
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / old_img_size * new_img_size
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / old_img_size * new_img_size
			
 
				-            # refine tgt
			
 
				-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
			
 
				-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
			
 
				-            keep = (min_tgt_size >= min_box_size)
			
 
				-
			
 
				-            tgt["boxes"] = boxes[keep]
			
 
				-            tgt["labels"] = labels[keep]
			
 
				-
			
 
				-        return images, targets, new_img_size
			
 
				-
			
 
				-    def check_second_stage(self):
			
 
				-        # set second stage
			
 
				-        print('============== Second stage of Training ==============')
			
 
				-        self.second_stage = True
			
 
				-
			
 
				-        # close mosaic augmentation
			
 
				-        if self.train_loader.dataset.mosaic_prob > 0.:
			
 
				-            print(' - Close < Mosaic Augmentation > ...')
			
 
				-            self.train_loader.dataset.mosaic_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close mixup augmentation
			
 
				-        if self.train_loader.dataset.mixup_prob > 0.:
			
 
				-            print(' - Close < Mixup Augmentation > ...')
			
 
				-            self.train_loader.dataset.mixup_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close rotation augmentation
			
 
				-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
			
 
				-            print(' - Close < degress of rotation > ...')
			
 
				-            self.trans_cfg['degrees'] = 0.0
			
 
				-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
			
 
				-            print(' - Close < shear of rotation >...')
			
 
				-            self.trans_cfg['shear'] = 0.0
			
 
				-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
			
 
				-            print(' - Close < perspective of rotation > ...')
			
 
				-            self.trans_cfg['perspective'] = 0.0
			
 
				-
			
 
				-        # build a new transform for second stage
			
 
				-        print(' - Rebuild transforms ...')
			
 
				-        self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.train_loader.dataset.transform = self.train_transform
			
 
				-        
			
 
				-    def check_third_stage(self):
			
 
				-        # set third stage
			
 
				-        print('============== Third stage of Training ==============')
			
 
				-        self.third_stage = True
			
 
				-
			
 
				-        # close random affine
			
 
				-        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
			
 
				-            print(' - Close < translate of affine > ...')
			
 
				-            self.trans_cfg['translate'] = 0.0
			
 
				-        if 'scale' in self.trans_cfg.keys():
			
 
				-            print(' - Close < scale of affine >...')
			
 
				-            self.trans_cfg['scale'] = [1.0, 1.0]
			
 
				-
			
 
				-        # build a new transform for second stage
			
 
				-        print(' - Rebuild transforms ...')
			
 
				-        self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.train_loader.dataset.transform = self.train_transform
			
 
				-
			
 
				-## Real-time DETR Trainer
			
 
				-class RTDetrTrainer(object):
			
 
				-    def __init__(self, args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
			
 
				-        # ------------------- Basic parameters -------------------
			
 
				-        self.args = args
			
 
				-        self.epoch = 0
			
 
				-        self.best_map = -1.
			
 
				-        self.device = device
			
 
				-        self.criterion = criterion
			
 
				-        self.world_size = world_size
			
 
				-        self.grad_accumulate = args.grad_accumulate
			
 
				-        self.clip_grad = 0.1
			
 
				-        self.heavy_eval = False
			
 
				-        self.normalize_bbox = True
			
 
				-        # close AMP for RT-DETR
			
 
				-        self.args.fp16 = False
			
 
				-        # weak augmentatino stage
			
 
				-        self.second_stage = False
			
 
				-        self.second_stage_epoch = -1
			
 
				-        # path to save model
			
 
				-        self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				-        os.makedirs(self.path_to_save, exist_ok=True)
			
 
				-
			
 
				-        # ---------------------------- Hyperparameters refer to RTMDet ----------------------------
			
 
				-        self.optimizer_dict = {'optimizer': 'adamw', 'momentum': None, 'weight_decay': 0.0001, 'lr0': 0.0001, 'backbone_lr_ratio': 0.1}
			
 
				-        self.warmup_dict = {'warmup': 'linear', 'warmup_iters': 2000, 'warmup_factor': 0.00066667}
			
 
				-        self.lr_schedule_dict = {'lr_scheduler': 'step', 'lr_epoch': [self.args.max_epoch // 12 * 11]}
			
 
				-        self.ema_dict = {'ema_decay': 0.9999, 'ema_tau': 2000}
			
 
				-
			
 
				-        # ---------------------------- Build Dataset & Model & Trans. Config ----------------------------
			
 
				-        self.data_cfg  = data_cfg
			
 
				-        self.model_cfg = model_cfg
			
 
				-        self.trans_cfg = trans_cfg
			
 
				-
			
 
				-        # ---------------------------- Build Transform ----------------------------
			
 
				-        self.train_transform, self.trans_cfg = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        self.val_transform, _ = build_transform(
			
 
				-            args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				-        if self.trans_cfg["mosaic_prob"] > 0:
			
 
				-            self.second_stage_epoch = 5
			
 
				-
			
 
				-        # ---------------------------- Build Dataset & Dataloader ----------------------------
			
 
				-        self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
 
				-        self.train_loader = build_dataloader(args, self.dataset, self.args.batch_size // self.world_size, CollateFunc())
			
 
				-
			
 
				-        # ---------------------------- Build Evaluator ----------------------------
			
 
				-        self.evaluator = build_evluator(args, self.data_cfg, self.val_transform, self.device)
			
 
				-
			
 
				-        # ---------------------------- Build Grad. Scaler ----------------------------
			
 
				-        self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
			
 
				-
			
 
				-        # ---------------------------- Build Optimizer ----------------------------
			
 
				-        self.optimizer_dict['lr0'] *= self.args.batch_size / 16.  # auto lr scaling
			
 
				-        self.optimizer, self.start_epoch = build_rtdetr_optimizer(self.optimizer_dict, model, self.args.resume)
			
 
				-
			
 
				-        # ---------------------------- Build LR Scheduler ----------------------------
			
 
				-        self.wp_lr_scheduler = build_wp_lr_scheduler(self.warmup_dict, self.optimizer_dict['lr0'])
			
 
				-        self.lr_scheduler    = build_lr_scheduler(self.lr_schedule_dict, self.optimizer, args.resume)
			
 
				-
			
 
				-        # ---------------------------- Build Model-EMA ----------------------------
			
 
				-        if self.args.ema and distributed_utils.get_rank() in [-1, 0]:
			
 
				-            print('Build ModelEMA ...')
			
 
				-            self.model_ema = ModelEMA(self.ema_dict, model, self.start_epoch * len(self.train_loader))
			
 
				-        else:
			
 
				-            self.model_ema = None
			
 
				-
			
 
				-    def train(self, model):
			
 
				-        for epoch in range(self.start_epoch, self.args.max_epoch):
			
 
				-            if self.args.distributed:
			
 
				-                self.train_loader.batch_sampler.sampler.set_epoch(epoch)
			
 
				-
			
 
				-            # check second stage
			
 
				-            if epoch >= (self.args.max_epoch - self.second_stage_epoch - 1) and not self.second_stage:
			
 
				-                self.check_second_stage()
			
 
				-                # save model of the last mosaic epoch
			
 
				-                weight_name = '{}_last_mosaic_epoch.pth'.format(self.args.model)
			
 
				-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                print('Saving state of the last Mosaic epoch-{}.'.format(self.epoch))
			
 
				-                torch.save({'model': model.state_dict(),
			
 
				-                            'mAP': round(self.evaluator.map*100, 1),
			
 
				-                            'optimizer': self.optimizer.state_dict(),
			
 
				-                            'epoch': self.epoch,
			
 
				-                            'args': self.args}, 
			
 
				-                            checkpoint_path)
			
 
				-
			
 
				-            # train one epoch
			
 
				-            self.epoch = epoch
			
 
				-            self.train_one_epoch(model)
			
 
				-
			
 
				-            # eval one epoch
			
 
				-            if self.heavy_eval:
			
 
				-                model_eval = model.module if self.args.distributed else model
			
 
				-                self.eval(model_eval)
			
 
				-            else:
			
 
				-                model_eval = model.module if self.args.distributed else model
			
 
				-                if (epoch % self.args.eval_epoch) == 0 or (epoch == self.args.max_epoch - 1):
			
 
				-                    self.eval(model_eval)
			
 
				-
			
 
				-            if self.args.debug:
			
 
				-                print("For debug mode, we only train 1 epoch")
			
 
				-                break
			
 
				-
			
 
				-    def eval(self, model):
			
 
				-        # chech model
			
 
				-        model_eval = model if self.model_ema is None else self.model_ema.ema
			
 
				-
			
 
				-        if distributed_utils.is_main_process():
			
 
				-            # check evaluator
			
 
				-            if self.evaluator is None:
			
 
				-                print('No evaluator ... save model and go on training.')
			
 
				-                print('Saving state, epoch: {}'.format(self.epoch))
			
 
				-                weight_name = '{}_no_eval.pth'.format(self.args.model)
			
 
				-                checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                torch.save({'model': model_eval.state_dict(),
			
 
				-                            'mAP': -1.,
			
 
				-                            'optimizer': self.optimizer.state_dict(),
			
 
				-                            'epoch': self.epoch,
			
 
				-                            'args': self.args}, 
			
 
				-                            checkpoint_path)               
			
 
				-            else:
			
 
				-                print('eval ...')
			
 
				-                # set eval mode
			
 
				-                model_eval.eval()
			
 
				-
			
 
				-                # evaluate
			
 
				-                with torch.no_grad():
			
 
				-                    self.evaluator.evaluate(model_eval)
			
 
				-
			
 
				-                # save model
			
 
				-                cur_map = self.evaluator.map
			
 
				-                if cur_map > self.best_map:
			
 
				-                    # update best-map
			
 
				-                    self.best_map = cur_map
			
 
				-                    # save model
			
 
				-                    print('Saving state, epoch:', self.epoch)
			
 
				-                    weight_name = '{}_best.pth'.format(self.args.model)
			
 
				-                    checkpoint_path = os.path.join(self.path_to_save, weight_name)
			
 
				-                    torch.save({'model': model_eval.state_dict(),
			
 
				-                                'mAP': round(self.best_map*100, 1),
			
 
				-                                'optimizer': self.optimizer.state_dict(),
			
 
				-                                'epoch': self.epoch,
			
 
				-                                'args': self.args}, 
			
 
				-                                checkpoint_path)                      
			
 
				-
			
 
				-                # set train mode.
			
 
				-                model_eval.train()
			
 
				-
			
 
				-        if self.args.distributed:
			
 
				-            # wait for all processes to synchronize
			
 
				-            dist.barrier()
			
 
				-
			
 
				-    def train_one_epoch(self, model):
			
 
				-        metric_logger = MetricLogger(delimiter="  ")
			
 
				-        metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
			
 
				-        metric_logger.add_meter('size', SmoothedValue(window_size=1, fmt='{value:d}'))
			
 
				-        metric_logger.add_meter('grad_norm', SmoothedValue(window_size=1, fmt='{value:.1f}'))
			
 
				-        header = 'Epoch: [{} / {}]'.format(self.epoch, self.args.max_epoch)
			
 
				-        epoch_size = len(self.train_loader)
			
 
				-        print_freq = 10
			
 
				-
			
 
				-        # basic parameters
			
 
				-        epoch_size = len(self.train_loader)
			
 
				-        img_size = self.args.img_size
			
 
				-        nw = self.warmup_dict['warmup_iters']
			
 
				-        lr_warmup_stage = True
			
 
				-
			
 
				-        # Train one epoch
			
 
				-        for iter_i, (images, targets) in enumerate(metric_logger.log_every(self.train_loader, print_freq, header)):
			
 
				-            ni = iter_i + self.epoch * epoch_size
			
 
				-            # WarmUp
			
 
				-            if ni < nw and lr_warmup_stage:
			
 
				-                self.wp_lr_scheduler(ni, self.optimizer)
			
 
				-            elif ni == nw and lr_warmup_stage:
			
 
				-                print('Warmup stage is over.')
			
 
				-                lr_warmup_stage = False
			
 
				-                self.wp_lr_scheduler.set_lr(self.optimizer, self.optimizer_dict['lr0'], self.optimizer_dict['lr0'])
			
 
				-                                            
			
 
				-            # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float()
			
 
				-            for tgt in targets:
			
 
				-                tgt['boxes'] = tgt['boxes'].to(self.device)
			
 
				-                tgt['labels'] = tgt['labels'].to(self.device)
			
 
				-
			
 
				-            # Multi scale
			
 
				-            if self.args.multi_scale:
			
 
				-                images, targets, img_size = self.rescale_image_targets(
			
 
				-                    images, targets, self.model_cfg['max_stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
			
 
				-            else:
			
 
				-                targets = self.refine_targets(img_size, targets, self.args.min_box_size)
			
 
				-
			
 
				-            # xyxy -> cxcywh
			
 
				-            targets = self.box_xyxy_to_cxcywh(targets)
			
 
				-                
			
 
				-            # Visualize train targets
			
 
				-            if self.args.vis_tgt:
			
 
				-                targets = self.box_cxcywh_to_xyxy(targets)
			
 
				-                vis_data(images, targets, normalized_bbox=self.normalize_bbox,
			
 
				-                         pixel_mean=self.trans_cfg['pixel_mean'], pixel_std=self.trans_cfg['pixel_std'])
			
 
				-                targets = self.box_xyxy_to_cxcywh(targets)
			
 
				-
			
 
				-            # Inference
			
 
				-            with torch.cuda.amp.autocast(enabled=self.args.fp16):
			
 
				-                outputs = model(images, targets)    
			
 
				-                loss_dict = self.criterion(outputs, targets)
			
 
				-                losses = sum(loss_dict.values())
			
 
				-                # Grad Accumulate
			
 
				-                if self.grad_accumulate > 1:
			
 
				-                    losses /= self.grad_accumulate
			
 
				-                loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
			
 
				-
			
 
				-            # Backward
			
 
				-            self.scaler.scale(losses).backward()
			
 
				-
			
 
				-            # Optimize
			
 
				-            if ni % self.grad_accumulate == 0:
			
 
				-                grad_norm = None
			
 
				-                if self.clip_grad > 0:
			
 
				-                    # unscale gradients
			
 
				-                    self.scaler.unscale_(self.optimizer)
			
 
				-                    # clip gradients
			
 
				-                    grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
			
 
				-                # optimizer.step
			
 
				-                self.scaler.step(self.optimizer)
			
 
				-                self.scaler.update()
			
 
				-                self.optimizer.zero_grad()
			
 
				-                # ema
			
 
				-                if self.model_ema is not None:
			
 
				-                    self.model_ema.update(model)
			
 
				-
			
 
				-            # Update log
			
 
				-            metric_logger.update(loss=losses.item(), **loss_dict_reduced)
			
 
				-            metric_logger.update(lr=self.optimizer.param_groups[0]["lr"])
			
 
				-            metric_logger.update(grad_norm=grad_norm)
			
 
				-            metric_logger.update(size=img_size)
			
 
				-
			
 
				-            if self.args.debug:
			
 
				-                print("For debug mode, we only train 1 iteration")
			
 
				-                break
			
 
				-    
			
 
				-        # LR Scheduler
			
 
				-        self.lr_scheduler.step()
			
 
				-        
			
 
				-    def refine_targets(self, img_size, targets, min_box_size):
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes = tgt["boxes"].clone()
			
 
				-            labels = tgt["labels"].clone()
			
 
				-            # refine tgt
			
 
				-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
			
 
				-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
			
 
				-            keep = (min_tgt_size >= min_box_size)
			
 
				-            if self.normalize_bbox:
			
 
				-                # normalize box
			
 
				-                boxes[:, [0, 2]] = boxes[:, [0, 2]] / img_size
			
 
				-                boxes[:, [1, 3]] = boxes[:, [1, 3]] / img_size
			
 
				-
			
 
				-            tgt["boxes"] = boxes[keep]
			
 
				-            tgt["labels"] = labels[keep]
			
 
				-        
			
 
				-        return targets
			
 
				-
			
 
				-    def rescale_image_targets(self, images, targets, stride, min_box_size, multi_scale_range=[0.5, 1.5]):
			
 
				-        """
			
 
				-            Deployed for Multi scale trick.
			
 
				-        """
			
 
				-        if isinstance(stride, int):
			
 
				-            max_stride = stride
			
 
				-        elif isinstance(stride, list):
			
 
				-            max_stride = max(stride)
			
 
				-
			
 
				-        # During training phase, the shape of input image is square.
			
 
				-        old_img_size = images.shape[-1]
			
 
				-        new_img_size = random.randrange(old_img_size * multi_scale_range[0], old_img_size * multi_scale_range[1] + max_stride)
			
 
				-        new_img_size = new_img_size // max_stride * max_stride  # size
			
 
				-        if new_img_size / old_img_size != 1:
			
 
				-            # interpolate
			
 
				-            images = torch.nn.functional.interpolate(
			
 
				-                                input=images, 
			
 
				-                                size=new_img_size, 
			
 
				-                                mode='bilinear', 
			
 
				-                                align_corners=False)
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes = tgt["boxes"].clone()
			
 
				-            labels = tgt["labels"].clone()
			
 
				-            boxes = torch.clamp(boxes, 0, old_img_size)
			
 
				-            # rescale box
			
 
				-            boxes[:, [0, 2]] = boxes[:, [0, 2]] / old_img_size * new_img_size
			
 
				-            boxes[:, [1, 3]] = boxes[:, [1, 3]] / old_img_size * new_img_size
			
 
				-            # refine tgt
			
 
				-            tgt_boxes_wh = boxes[..., 2:] - boxes[..., :2]
			
 
				-            min_tgt_size = torch.min(tgt_boxes_wh, dim=-1)[0]
			
 
				-            keep = (min_tgt_size >= min_box_size)
			
 
				-            if self.normalize_bbox:
			
 
				-                # normalize box
			
 
				-                boxes[:, [0, 2]] = boxes[:, [0, 2]] / new_img_size
			
 
				-                boxes[:, [1, 3]] = boxes[:, [1, 3]] / new_img_size
			
 
				-
			
 
				-            tgt["boxes"] = boxes[keep]
			
 
				-            tgt["labels"] = labels[keep]
			
 
				-
			
 
				-        return images, targets, new_img_size
			
 
				-
			
 
				-    def box_xyxy_to_cxcywh(self, targets):
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes_xyxy = tgt["boxes"].clone()
			
 
				-            # rescale box
			
 
				-            cxcy = (boxes_xyxy[..., :2] + boxes_xyxy[..., 2:]) * 0.5
			
 
				-            bwbh = boxes_xyxy[..., 2:] - boxes_xyxy[..., :2]
			
 
				-            boxes_bwbh = torch.cat([cxcy, bwbh], dim=-1)
			
 
				-
			
 
				-            tgt["boxes"] = boxes_bwbh
			
 
				-
			
 
				-        return targets
			
 
				-
			
 
				-    def box_cxcywh_to_xyxy(self, targets):
			
 
				-        # rescale targets
			
 
				-        for tgt in targets:
			
 
				-            boxes_cxcywh = tgt["boxes"].clone()
			
 
				-            # rescale box
			
 
				-            x1y1 = boxes_cxcywh[..., :2] - boxes_cxcywh[..., 2:] * 0.5
			
 
				-            x2y2 = boxes_cxcywh[..., :2] + boxes_cxcywh[..., 2:] * 0.5
			
 
				-            boxes_bwbh = torch.cat([x1y1, x2y2], dim=-1)
			
 
				-
			
 
				-            tgt["boxes"] = boxes_bwbh
			
 
				-
			
 
				-        return targets
			
 
				-
			
 
				-    def check_second_stage(self):
			
 
				-        # set second stage
			
 
				-        print('============== Second stage of Training ==============')
			
 
				-        self.second_stage = True
			
 
				-
			
 
				-        # close mosaic augmentation
			
 
				-        if self.train_loader.dataset.mosaic_prob > 0.:
			
 
				-            print(' - Close < Mosaic Augmentation > ...')
			
 
				-            self.train_loader.dataset.mosaic_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close mixup augmentation
			
 
				-        if self.train_loader.dataset.mixup_prob > 0.:
			
 
				-            print(' - Close < Mixup Augmentation > ...')
			
 
				-            self.train_loader.dataset.mixup_prob = 0.
			
 
				-            self.heavy_eval = True
			
 
				-
			
 
				-        # close rotation augmentation
			
 
				-        if 'degrees' in self.trans_cfg.keys() and self.trans_cfg['degrees'] > 0.0:
			
 
				-            print(' - Close < degress of rotation > ...')
			
 
				-            self.trans_cfg['degrees'] = 0.0
			
 
				-        if 'shear' in self.trans_cfg.keys() and self.trans_cfg['shear'] > 0.0:
			
 
				-            print(' - Close < shear of rotation >...')
			
 
				-            self.trans_cfg['shear'] = 0.0
			
 
				-        if 'perspective' in self.trans_cfg.keys() and self.trans_cfg['perspective'] > 0.0:
			
 
				-            print(' - Close < perspective of rotation > ...')
			
 
				-            self.trans_cfg['perspective'] = 0.0
			
 
				+        # close random affine
			
 
				+        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
			
 
				+            print(' - Close < translate of affine > ...')
			
 
				+            self.trans_cfg['translate'] = 0.0
			
 
				+        if 'scale' in self.trans_cfg.keys():
			
 
				+            print(' - Close < scale of affine >...')
			
 
				+            self.trans_cfg['scale'] = [1.0, 1.0]
			
 
				 
			
 
				         # build a new transform for second stage
			
 
				         print(' - Rebuild transforms ...')
			
 
				         self.train_transform, self.trans_cfg = build_transform(
			
 
				             args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				-        
			
 
				         self.train_loader.dataset.transform = self.train_transform
			
 
				 
			
 
				 
			
 
				 # Build Trainer
			
 
				 def build_trainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size):
			
 
				     # ----------------------- Det trainers -----------------------
			
 
				-    if   model_cfg['trainer_type'] == 'yolov8':
			
 
				-        return Yolov8Trainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
			
 
				+    if   model_cfg['trainer_type'] == 'yolo':
			
 
				+        return RTCTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
			
 
				     elif model_cfg['trainer_type'] == 'yolox':
			
 
				         return YoloxTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
			
 
				-    elif model_cfg['trainer_type'] == 'rtcdet':
			
 
				-        return RTCTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
			
 
				-    elif model_cfg['trainer_type'] == 'rtdetr':
			
 
				-        return RTDetrTrainer(args, data_cfg, model_cfg, trans_cfg, device, model, criterion, world_size)
			
 
				     else:
			
 
				         raise NotImplementedError(model_cfg['trainer_type'])
			
 
				     
			
--- a/utils/solver/optimizer.py
+++ b/utils/solver/optimizer.py
@@ -2,7 +2,7 @@ import torch
 
				 import torch.nn as nn
			
 
				 
			
 
				 
			
 
				-def build_yolo_optimizer(cfg, model, resume=None):
			
 
				+def build_optimizer(cfg, model, resume=None):
			
 
				     print('==============================')
			
 
				     print('Optimizer: {}'.format(cfg['optimizer']))
			
 
				     print('--base lr: {}'.format(cfg['lr0']))
			
@@ -42,55 +42,3 @@ def build_yolo_optimizer(cfg, model, resume=None):
 
				         del checkpoint, checkpoint_state_dict
			
 
				                                                         
			
 
				     return optimizer, start_epoch
			
 
				-
			
 
				-
			
 
				-def build_rtdetr_optimizer(cfg, model, resume=None):
			
 
				-    print('==============================')
			
 
				-    print('Optimizer: {}'.format(cfg['optimizer']))
			
 
				-    print('--base lr: {}'.format(cfg['lr0']))
			
 
				-    print('--weight_decay: {}'.format(cfg['weight_decay']))
			
 
				-
			
 
				-    # ------------- Divide model's parameters -------------
			
 
				-    param_dicts = [], [], [], [], [], []
			
 
				-    norm_names = ["norm"] + ["norm{}".format(i) for i in range(10000)]
			
 
				-    for n, p in model.named_parameters():
			
 
				-        # Non-Backbone's learnable parameters
			
 
				-        if "backbone" not in n and p.requires_grad:
			
 
				-            if "bias" == n.split(".")[-1]:
			
 
				-                param_dicts[0].append(p)      # no weight decay for all layers' bias
			
 
				-            else:
			
 
				-                if n.split(".")[-2] in norm_names:
			
 
				-                    param_dicts[1].append(p)  # no weight decay for all NormLayers' weight
			
 
				-                else:
			
 
				-                    param_dicts[2].append(p)  # weight decay for all Non-NormLayers' weight
			
 
				-        # Backbone's learnable parameters
			
 
				-        elif "backbone" in n and p.requires_grad:
			
 
				-            if "bias" == n.split(".")[-1]:
			
 
				-                param_dicts[3].append(p)      # no weight decay for all layers' bias
			
 
				-            else:
			
 
				-                if n.split(".")[-2] in norm_names:
			
 
				-                    param_dicts[4].append(p)  # no weight decay for all NormLayers' weight
			
 
				-                else:
			
 
				-                    param_dicts[5].append(p)  # weight decay for all Non-NormLayers' weight
			
 
				-
			
 
				-    # Non-Backbone's learnable parameters
			
 
				-    optimizer = torch.optim.AdamW(param_dicts[0], lr=cfg['lr0'], weight_decay=0.0)
			
 
				-    optimizer.add_param_group({"params": param_dicts[1], "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[2], "weight_decay": cfg['weight_decay']})
			
 
				-
			
 
				-    # Backbone's learnable parameters
			
 
				-    backbone_lr = cfg['lr0'] * cfg['backbone_lr_ratio']
			
 
				-    optimizer.add_param_group({"params": param_dicts[3], "lr": backbone_lr, "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[4], "lr": backbone_lr, "weight_decay": 0.0})
			
 
				-    optimizer.add_param_group({"params": param_dicts[5], "lr": backbone_lr, "weight_decay": cfg['weight_decay']})
			
 
				-
			
 
				-    start_epoch = 0
			
 
				-    if resume and resume != 'None':
			
 
				-        print('keep training: ', resume)
			
 
				-        checkpoint = torch.load(resume)
			
 
				-        # checkpoint state dict
			
 
				-        checkpoint_state_dict = checkpoint.pop("optimizer")
			
 
				-        optimizer.load_state_dict(checkpoint_state_dict)
			
 
				-        start_epoch = checkpoint.pop("epoch") + 1
			
 
				-                                                        
			
 
				-    return optimizer, start_epoch