1 yıl önce · 3ddb801333
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -33,9 +33,8 @@ from .data_config.transform_config import (
 
				     # SSD-Style
			
 
				     ssd_trans_config,
			
 
				     # RT-DETR style
			
 
				-    rtdetr_base_trans_config,
			
 
				+    rtdetr_s_trans_config,
			
 
				     rtdetr_l_trans_config,
			
 
				-    rtdetr_x_trans_config
			
 
				 )
			
 
				 
			
 
				 def build_trans_config(trans_config='ssd'):
			
@@ -75,12 +74,10 @@ def build_trans_config(trans_config='ssd'):
 
				         cfg = yolox_x_trans_config
			
 
				 
			
 
				     # RT-DETR style
			
 
				-    elif trans_config == 'rtdetr_base':
			
 
				-        cfg = rtdetr_base_trans_config
			
 
				+    elif trans_config == 'rtdetr_s':
			
 
				+        cfg = rtdetr_s_trans_config
			
 
				     elif trans_config == 'rtdetr_l':
			
 
				         cfg = rtdetr_l_trans_config
			
 
				-    elif trans_config == 'rtdetr_x':
			
 
				-        cfg = rtdetr_x_trans_config
			
 
				 
			
 
				     print('Transform Config: {} \n'.format(cfg))
			
 
				 
			
--- a/config/data_config/transform_config.py
+++ b/config/data_config/transform_config.py
@@ -4,256 +4,280 @@
 
				 # ----------------------- YOLOv5-Style Transform -----------------------
			
 
				 yolov5_x_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.2,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.2,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.2,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolov5_l_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.2,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.15,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.15,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolov5_m_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.2,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.10,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.10,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolov5_s_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.2,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolov5_n_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.5, 1.5],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.5, 1.5],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolov5_p_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 0.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.5, 1.5],
			
 
				-    'shear': 0.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': True,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.5, 1.5],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 0.5,
			
 
				-    'mixup_prob': 0.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 0.5,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 
			
 
				 # ----------------------- YOLOX-Style Transform -----------------------
			
 
				 yolox_x_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 1.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  1.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolox_l_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 1.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  1.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolox_m_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 1.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  1.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolox_s_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.1, 2.0],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 1.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  1.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolox_n_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.5, 1.5],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 1.0,
			
 
				-    'mixup_prob': 0.5,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.5,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 yolox_p_trans_config = {
			
 
				     'aug_type': 'yolov5',
			
 
				-    # Basic Augment
			
 
				-    'degrees': 10.0,
			
 
				-    'translate': 0.1,
			
 
				-    'scale': [0.5, 1.5],
			
 
				-    'shear': 2.0,
			
 
				-    'perspective': 0.0,
			
 
				-    'hsv_h': 0.015,
			
 
				-    'hsv_s': 0.7,
			
 
				-    'hsv_v': 0.4,
			
 
				     'use_ablu': False,
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 10.0,
			
 
				+        'translate': 0.1,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 2.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				     # Mosaic & Mixup
			
 
				-    'mosaic_prob': 0.5,
			
 
				-    'mixup_prob': 0.0,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolox_mixup',
			
 
				     'mosaic_keep_ratio': True,
			
 
				-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
			
 
				+    'mosaic_prob': 0.5,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolox',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 
			
@@ -262,54 +286,62 @@ ssd_trans_config = {
 
				     'aug_type': 'ssd',
			
 
				     'use_ablu': False,
			
 
				     # Mosaic & Mixup are not used for SSD-style augmentation
			
 
				-    'mosaic_prob': 0.,
			
 
				-    'mixup_prob': 0.,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				     'mosaic_keep_ratio': False,
			
 
				-    'mixup_scale': [0.5, 1.5]
			
 
				+    'mosaic_prob': 0.0,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 
			
 
				 # ----------------------- SSD-Style Transform -----------------------
			
 
				-rtdetr_base_trans_config = {
			
 
				+rtdetr_s_trans_config = {
			
 
				     'aug_type': 'rtdetr',
			
 
				-    'use_ablu': False,
			
 
				+    'use_ablu': True,
			
 
				     'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics
			
 
				     'pixel_std':  [58.395, 57.12, 57.375],    # IN-1K statistics
			
 
				-    # Mosaic & Mixup are not used for RT_DETR-style augmentation
			
 
				-    'mosaic_prob': 0.,
			
 
				-    'mixup_prob': 0.,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				+    # Mosaic & Mixup
			
 
				     'mosaic_keep_ratio': False,
			
 
				-    'mixup_scale': [0.5, 1.5]
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.0,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
 
				 
			
 
				 rtdetr_l_trans_config = {
			
 
				     'aug_type': 'rtdetr',
			
 
				-    'use_ablu': False,
			
 
				-    'pixel_mean': [0., 0., 0.],
			
 
				-    'pixel_std':  [255., 255., 255.],
			
 
				-    # Mosaic & Mixup are not used for RT_DETR-style augmentation
			
 
				-    'mosaic_prob': 0.,
			
 
				-    'mixup_prob': 0.,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				-    'mosaic_keep_ratio': False,
			
 
				-    'mixup_scale': [0.5, 1.5]
			
 
				-}
			
 
				-
			
 
				-rtdetr_x_trans_config = {
			
 
				-    'aug_type': 'rtdetr',
			
 
				-    'use_ablu': False,
			
 
				-    'pixel_mean': [0., 0., 0.],
			
 
				-    'pixel_std':  [255., 255., 255.],
			
 
				-    # Mosaic & Mixup are not used for RT_DETR-style augmentation
			
 
				-    'mosaic_prob': 0.,
			
 
				-    'mixup_prob': 0.,
			
 
				-    'mosaic_type': 'yolov5_mosaic',
			
 
				-    'mixup_type': 'yolov5_mixup',
			
 
				+    'use_ablu': True,
			
 
				+    'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics
			
 
				+    'pixel_std':  [58.395, 57.12, 57.375],    # IN-1K statistics
			
 
				+    # Basic Augment
			
 
				+    'affine_params': {
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': [0.1, 2.0],
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+    },
			
 
				+    # Mosaic & Mixup
			
 
				     'mosaic_keep_ratio': False,
			
 
				-    'mixup_scale': [0.5, 1.5]
			
 
				+    'mosaic_prob': 1.0,
			
 
				+    'mixup_prob':  0.15,
			
 
				+    'mosaic_type': 'yolov5',
			
 
				+    'mixup_type':  'yolov5',
			
 
				+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
			
 
				 }
			
--- a/config/model_config/rtdetr_config.py
+++ b/config/model_config/rtdetr_config.py
@@ -54,7 +54,7 @@ rtdetr_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'rtdetr_base',
			
 
				+        'trans_type': 'rtdetr_s',
			
 
				         # ---------------- Train config ----------------
			
 
				         'trainer_type': 'rtdetr',
			
 
				     },
			
@@ -112,7 +112,7 @@ rtdetr_cfg = {
 
				         # ---------------- Train config ----------------
			
 
				         ## input
			
 
				         'multi_scale': [0.5, 1.25],   # 320 -> 800
			
 
				-        'trans_type': 'rtdetr_base',
			
 
				+        'trans_type': 'rtdetr_l',
			
 
				         # ---------------- Train config ----------------
			
 
				         'trainer_type': 'rtdetr',
			
 
				     },
			
--- a/dataset/build.py
+++ b/dataset/build.py
@@ -111,23 +111,24 @@ def build_transform(args, trans_config, max_stride=32, is_train=False):
 
				     ## SSD style transform
			
 
				     if trans_config['aug_type'] == 'ssd':
			
 
				         if is_train:
			
 
				-            transform = SSDAugmentation(img_size=args.img_size,)
			
 
				+            transform = SSDAugmentation(args.img_size)
			
 
				         else:
			
 
				-            transform = SSDBaseTransform(img_size=args.img_size,)
			
 
				+            transform = SSDBaseTransform(args.img_size)
			
 
				     ## YOLO style transform
			
 
				     elif trans_config['aug_type'] == 'yolov5':
			
 
				         if is_train:
			
 
				-            transform = YOLOv5Augmentation(img_size=args.img_size, trans_config=trans_config, use_ablu=trans_config['use_ablu'])
			
 
				+            transform = YOLOv5Augmentation(args.img_size, trans_config['affine_params'], trans_config['use_ablu'])
			
 
				         else:
			
 
				-            transform = YOLOv5BaseTransform(img_size=args.img_size,max_stride=max_stride)
			
 
				+            transform = YOLOv5BaseTransform(args.img_size, max_stride)
			
 
				     ## RT-DETR style transform
			
 
				     elif trans_config['aug_type'] == 'rtdetr':
			
 
				         if is_train:
			
 
				-            use_mosaic = False if trans_config['mosaic_prob'] < 0.2 else True
			
 
				             transform = RTDetrAugmentation(
			
 
				-                img_size=args.img_size, pixel_mean=trans_config['pixel_mean'], pixel_std=trans_config['pixel_std'], use_mosaic=use_mosaic)
			
 
				+                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
			
 
				+            if trans_config["mosaic_prob"] > 0:
			
 
				+                transform.reset_weak_augment()
			
 
				         else:
			
 
				             transform = RTDetrBaseTransform(
			
 
				-                img_size=args.img_size, pixel_mean=trans_config['pixel_mean'], pixel_std=trans_config['pixel_std'])
			
 
				+                args.img_size, trans_config['pixel_mean'], trans_config['pixel_std'])
			
 
				 
			
 
				     return transform, trans_config
			
--- a/dataset/coco.py
+++ b/dataset/coco.py
@@ -3,8 +3,6 @@ import cv2
 
				 import time
			
 
				 import random
			
 
				 import numpy as np
			
 
				-
			
 
				-import torch
			
 
				 from torch.utils.data import Dataset
			
 
				 
			
 
				 try:
			
@@ -13,9 +11,9 @@ except:
 
				     print("It seems that the COCOAPI is not installed.")
			
 
				 
			
 
				 try:
			
 
				-    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 except:
			
 
				-    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				 coco_class_index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
			
@@ -52,10 +50,19 @@ class COCODataset(Dataset):
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
 
				         self.dataset_size = len(self.ids)
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.transform = transform
			
 
				-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				         self.trans_config = trans_config
			
 
				+        self.transform = transform
			
 
				+        # ----------- Strong augmentation -----------
			
 
				+        if is_train:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+            self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+        else:
			
 
				+            self.mosaic_prob = 0.0
			
 
				+            self.mixup_prob  = 0.0
			
 
				+            self.mosaic_augment = None
			
 
				+            self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
@@ -66,7 +73,6 @@ class COCODataset(Dataset):
 
				         if self.load_cache:
			
 
				             self.cached_datas = self._load_cache()
			
 
				 
			
 
				-
			
 
				     # ------------ Basic dataset function ------------
			
 
				     def __len__(self):
			
 
				         return len(self.ids)
			
@@ -108,13 +114,14 @@ class COCODataset(Dataset):
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				-        # load 4x mosaic image
			
 
				+        # ------------ Prepare 4 indexes of images ------------
			
 
				+        ## Load 4x mosaic image
			
 
				         index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				         id1 = index
			
 
				         id2, id3, id4 = random.sample(index_list, 3)
			
 
				         indexs = [id1, id2, id3, id4]
			
 
				 
			
 
				-        # load images and targets
			
 
				+        ## Load images and targets
			
 
				         image_list = []
			
 
				         target_list = []
			
 
				         for index in indexs:
			
@@ -122,26 +129,22 @@ class COCODataset(Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic
			
 
				-        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				-            image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
			
 
				+        # ------------ Mosaic augmentation ------------
			
 
				+        image, target = self.mosaic_augment(image_list, target_list)
			
 
				 
			
 
				         return image, target
			
 
				 
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				-        # YOLOv5 type Mixup
			
 
				-        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+        # ------------ Load a new image & target ------------
			
 
				+        if self.mixup_augment.mixup_type == 'yolov5':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_mosaic(new_index)
			
 
				-            image, target = yolov5_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target)
			
 
				-        # YOLOX type Mixup
			
 
				-        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+        elif self.mixup_augment.mixup_type == 'yolox':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_image_target(new_index)
			
 
				-            image, target = yolox_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+            
			
 
				+        # ------------ Mixup augmentation ------------
			
 
				+        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				 
			
 
				         return image, target
			
 
				     
			
--- a/dataset/crowdhuman.py
+++ b/dataset/crowdhuman.py
@@ -11,9 +11,9 @@ except:
 
				     print("It seems that the COCOAPI is not installed.")
			
 
				 
			
 
				 try:
			
 
				-    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 except:
			
 
				-    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				 crowd_class_labels = ('person',)
			
@@ -40,12 +40,20 @@ class CrowdHumanDataset(Dataset):
 
				         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
			
 
				         self.ids = self.coco.getImgIds()
			
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
 
				-
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.transform = transform
			
 
				-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				         self.trans_config = trans_config
			
 
				+        self.transform = transform
			
 
				+        # ----------- Strong augmentation -----------
			
 
				+        if is_train:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+            self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+        else:
			
 
				+            self.mosaic_prob = 0.0
			
 
				+            self.mixup_prob  = 0.0
			
 
				+            self.mosaic_augment = None
			
 
				+            self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
@@ -60,13 +68,14 @@ class CrowdHumanDataset(Dataset):
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				-        # load 4x mosaic image
			
 
				+        # ------------ Prepare 4 indexes of images ------------
			
 
				+        ## Load 4x mosaic image
			
 
				         index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				         id1 = index
			
 
				         id2, id3, id4 = random.sample(index_list, 3)
			
 
				         indexs = [id1, id2, id3, id4]
			
 
				 
			
 
				-        # load images and targets
			
 
				+        ## Load images and targets
			
 
				         image_list = []
			
 
				         target_list = []
			
 
				         for index in indexs:
			
@@ -74,26 +83,22 @@ class CrowdHumanDataset(Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic
			
 
				-        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				-            image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
			
 
				+        # ------------ Mosaic augmentation ------------
			
 
				+        image, target = self.mosaic_augment(image_list, target_list)
			
 
				 
			
 
				         return image, target
			
 
				 
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				-        # YOLOv5 type Mixup
			
 
				-        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+        # ------------ Load a new image & target ------------
			
 
				+        if self.mixup_augment.mixup_type == 'yolov5':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_mosaic(new_index)
			
 
				-            image, target = yolov5_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target)
			
 
				-        # YOLOX type Mixup
			
 
				-        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+        elif self.mixup_augment.mixup_type == 'yolox':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_image_target(new_index)
			
 
				-            image, target = yolox_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+            
			
 
				+        # ------------ Mixup augmentation ------------
			
 
				+        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				 
			
 
				         return image, target
			
 
				     
			
--- a/dataset/customed.py
+++ b/dataset/customed.py
@@ -4,7 +4,6 @@ import time
 
				 import random
			
 
				 import numpy as np
			
 
				 
			
 
				-import torch
			
 
				 from torch.utils.data import Dataset
			
 
				 
			
 
				 try:
			
@@ -13,9 +12,9 @@ except:
 
				     print("It seems that the COCOAPI is not installed.")
			
 
				 
			
 
				 try:
			
 
				-    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 except:
			
 
				-    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				 class CustomedDataset(Dataset):
			
@@ -41,14 +40,19 @@ class CustomedDataset(Dataset):
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
 
				         self.dataset_size = len(self.ids)
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.transform = transform
			
 
				-        self.mosaic_prob = 0
			
 
				-        self.mixup_prob = 0
			
 
				         self.trans_config = trans_config
			
 
				-        if trans_config is not None:
			
 
				-            self.mosaic_prob = trans_config['mosaic_prob']
			
 
				-            self.mixup_prob = trans_config['mixup_prob']
			
 
				-
			
 
				+        self.transform = transform
			
 
				+        # ----------- Strong augmentation -----------
			
 
				+        if is_train:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+            self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+        else:
			
 
				+            self.mosaic_prob = 0.0
			
 
				+            self.mixup_prob  = 0.0
			
 
				+            self.mosaic_augment = None
			
 
				+            self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('Image Set: {}'.format(image_set))
			
 
				         print('Json file: {}'.format(self.json_file))
			
@@ -103,13 +107,14 @@ class CustomedDataset(Dataset):
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				-        # load 4x mosaic image
			
 
				+        # ------------ Prepare 4 indexes of images ------------
			
 
				+        ## Load 4x mosaic image
			
 
				         index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				         id1 = index
			
 
				         id2, id3, id4 = random.sample(index_list, 3)
			
 
				         indexs = [id1, id2, id3, id4]
			
 
				 
			
 
				-        # load images and targets
			
 
				+        ## Load images and targets
			
 
				         image_list = []
			
 
				         target_list = []
			
 
				         for index in indexs:
			
@@ -117,26 +122,22 @@ class CustomedDataset(Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic
			
 
				-        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				-            image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
			
 
				+        # ------------ Mosaic augmentation ------------
			
 
				+        image, target = self.mosaic_augment(image_list, target_list)
			
 
				 
			
 
				         return image, target
			
 
				 
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				-        # YOLOv5 type Mixup
			
 
				-        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+        # ------------ Load a new image & target ------------
			
 
				+        if self.mixup_augment.mixup_type == 'yolov5':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_mosaic(new_index)
			
 
				-            image, target = yolov5_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target)
			
 
				-        # YOLOX type Mixup
			
 
				-        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+        elif self.mixup_augment.mixup_type == 'yolox':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_image_target(new_index)
			
 
				-            image, target = yolox_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+            
			
 
				+        # ------------ Mixup augmentation ------------
			
 
				+        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				 
			
 
				         return image, target
			
 
				     
			
--- a/dataset/data_augment/rtdetr_augment.py
+++ b/dataset/data_augment/rtdetr_augment.py
@@ -401,10 +401,9 @@ class ToTensor(object):
 
				 # ------------------------- Preprocessers -------------------------
			
 
				 ## Transform for Train
			
 
				 class RTDetrAugmentation(object):
			
 
				-    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375], use_mosaic=False):
			
 
				+    def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
			
 
				         # ----------------- Basic parameters -----------------
			
 
				         self.img_size = img_size
			
 
				-        self.use_mosaic = use_mosaic
			
 
				         self.pixel_mean = pixel_mean  # RGB format
			
 
				         self.pixel_std = pixel_std    # RGB format
			
 
				         self.color_format = 'rgb'
			
@@ -413,29 +412,18 @@ class RTDetrAugmentation(object):
 
				         print("Pixel std:  {}".format(self.pixel_std))
			
 
				 
			
 
				         # ----------------- Transforms -----------------
			
 
				-        if use_mosaic:
			
 
				-            # For use-mosaic setting, we do not use RandomSampleCrop processor.
			
 
				-            self.augment = Compose([
			
 
				-                RandomPhotometricDistort(hue=0.5, saturation=1.5, exposure=1.5),
			
 
				-                RandomHorizontalFlip(p=0.5),
			
 
				-                Resize(img_size=self.img_size),
			
 
				-                ConvertColorFormat(self.color_format),
			
 
				-                Normalize(self.pixel_mean, self.pixel_std),
			
 
				-                ToTensor()
			
 
				-            ])
			
 
				-        else:
			
 
				-            # For no-mosaic setting, we use RandomExpand & RandomSampleCrop processor.
			
 
				-            self.augment = Compose([
			
 
				-                RandomPhotometricDistort(hue=0.5, saturation=1.5, exposure=1.5),
			
 
				-                RandomJitterCrop(p=0.8, jitter_ratio=0.3, fill_value=self.pixel_mean[::-1]),
			
 
				-                RandomHorizontalFlip(p=0.5),
			
 
				-                Resize(img_size=self.img_size),
			
 
				-                ConvertColorFormat(self.color_format),
			
 
				-                Normalize(self.pixel_mean, self.pixel_std),
			
 
				-                ToTensor()
			
 
				-            ])
			
 
				-
			
 
				-    def set_weak_augment(self):
			
 
				+        self.augment = Compose([
			
 
				+            RandomPhotometricDistort(hue=0.5, saturation=1.5, exposure=1.5),
			
 
				+            RandomJitterCrop(p=0.8, jitter_ratio=0.3, fill_value=self.pixel_mean[::-1]),
			
 
				+            RandomHorizontalFlip(p=0.5),
			
 
				+            Resize(img_size=self.img_size),
			
 
				+            ConvertColorFormat(self.color_format),
			
 
				+            Normalize(self.pixel_mean, self.pixel_std),
			
 
				+            ToTensor()
			
 
				+        ])
			
 
				+
			
 
				+    def reset_weak_augment(self):
			
 
				+        print("Reset transform with weak augmentation ...")
			
 
				         self.augment = Compose([
			
 
				             RandomHorizontalFlip(p=0.5),
			
 
				             Resize(img_size=self.img_size),
			
@@ -444,6 +432,7 @@ class RTDetrAugmentation(object):
 
				             ToTensor()
			
 
				         ])
			
 
				 
			
 
				+
			
 
				     def __call__(self, image, target, mosaic=False):
			
 
				         orig_h, orig_w = image.shape[:2]
			
 
				         ratio = [self.img_size / orig_w, self.img_size / orig_h]
			
@@ -452,7 +441,6 @@ class RTDetrAugmentation(object):
 
				 
			
 
				         return image, target, ratio
			
 
				 
			
 
				-
			
 
				 ## Transform for Eval
			
 
				 class RTDetrBaseTransform(object):
			
 
				     def __init__(self, img_size=640, pixel_mean=[123.675, 116.28, 103.53], pixel_std=[58.395, 57.12, 57.375]):
			
--- a/dataset/data_augment/strong_augment.py
+++ b/dataset/data_augment/strong_augment.py
@@ -0,0 +1,250 @@
 
				+import random
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+
			
 
				+from .yolov5_augment import random_perspective
			
 
				+
			
 
				+
			
 
				+# ------------------------- Strong augmentations -------------------------
			
 
				+## Mosaic Augmentation
			
 
				+class MosaicAugment(object):
			
 
				+    def __init__(self,
			
 
				+                 img_size,
			
 
				+                 transform_config,
			
 
				+                 is_train=False,
			
 
				+                 ) -> None:
			
 
				+        self.img_size = img_size
			
 
				+        self.is_train = is_train
			
 
				+        self.keep_ratio    = transform_config['mosaic_keep_ratio']
			
 
				+        self.affine_params = transform_config['affine_params']
			
 
				+        self.mosaic_type   = transform_config['mosaic_type']
			
 
				+
			
 
				+    def yolov5_mosaic_augment(self, image_list, target_list):
			
 
				+        assert len(image_list) == 4
			
 
				+
			
 
				+        mosaic_img = np.ones([self.img_size*2, self.img_size*2, image_list[0].shape[2]], dtype=np.uint8) * 114
			
 
				+        # mosaic center
			
 
				+        yc, xc = [int(random.uniform(-x, 2*self.img_size + x)) for x in [-self.img_size // 2, -self.img_size // 2]]
			
 
				+        # yc = xc = self.img_size
			
 
				+
			
 
				+        mosaic_bboxes = []
			
 
				+        mosaic_labels = []
			
 
				+        for i in range(4):
			
 
				+            img_i, target_i = image_list[i], target_list[i]
			
 
				+            bboxes_i = target_i["boxes"]
			
 
				+            labels_i = target_i["labels"]
			
 
				+
			
 
				+            orig_h, orig_w, _ = img_i.shape
			
 
				+
			
 
				+            # resize
			
 
				+            if self.keep_ratio:
			
 
				+                r = self.img_size / max(orig_h, orig_w)
			
 
				+                if r != 1: 
			
 
				+                    interp = cv2.INTER_LINEAR if (self.is_train or r > 1) else cv2.INTER_AREA
			
 
				+                    img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
			
 
				+            else:
			
 
				+                interp = cv2.INTER_LINEAR if self.is_train else cv2.INTER_AREA
			
 
				+                img_i = cv2.resize(img_i, (self.img_size, self.img_size), interpolation=interp)
			
 
				+            h, w, _ = img_i.shape
			
 
				+
			
 
				+            # place img in img4
			
 
				+            if i == 0:  # top left
			
 
				+                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
			
 
				+                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
			
 
				+            elif i == 1:  # top right
			
 
				+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, self.img_size * 2), yc
			
 
				+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
			
 
				+            elif i == 2:  # bottom left
			
 
				+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(self.img_size * 2, yc + h)
			
 
				+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
			
 
				+            elif i == 3:  # bottom right
			
 
				+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, self.img_size * 2), min(self.img_size * 2, yc + h)
			
 
				+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
			
 
				+
			
 
				+            mosaic_img[y1a:y2a, x1a:x2a] = img_i[y1b:y2b, x1b:x2b]
			
 
				+            padw = x1a - x1b
			
 
				+            padh = y1a - y1b
			
 
				+
			
 
				+            # labels
			
 
				+            bboxes_i_ = bboxes_i.copy()
			
 
				+            if len(bboxes_i) > 0:
			
 
				+                # a valid target, and modify it.
			
 
				+                bboxes_i_[:, 0] = (w * bboxes_i[:, 0] / orig_w + padw)
			
 
				+                bboxes_i_[:, 1] = (h * bboxes_i[:, 1] / orig_h + padh)
			
 
				+                bboxes_i_[:, 2] = (w * bboxes_i[:, 2] / orig_w + padw)
			
 
				+                bboxes_i_[:, 3] = (h * bboxes_i[:, 3] / orig_h + padh)    
			
 
				+
			
 
				+                mosaic_bboxes.append(bboxes_i_)
			
 
				+                mosaic_labels.append(labels_i)
			
 
				+
			
 
				+        if len(mosaic_bboxes) == 0:
			
 
				+            mosaic_bboxes = np.array([]).reshape(-1, 4)
			
 
				+            mosaic_labels = np.array([]).reshape(-1)
			
 
				+        else:
			
 
				+            mosaic_bboxes = np.concatenate(mosaic_bboxes)
			
 
				+            mosaic_labels = np.concatenate(mosaic_labels)
			
 
				+
			
 
				+        # clip
			
 
				+        mosaic_bboxes = mosaic_bboxes.clip(0, self.img_size * 2)
			
 
				+
			
 
				+        # random perspective
			
 
				+        mosaic_targets = np.concatenate([mosaic_labels[..., None], mosaic_bboxes], axis=-1)
			
 
				+        mosaic_img, mosaic_targets = random_perspective(
			
 
				+            mosaic_img,
			
 
				+            mosaic_targets,
			
 
				+            self.affine_params['degrees'],
			
 
				+            translate=self.affine_params['translate'],
			
 
				+            scale=self.affine_params['scale'],
			
 
				+            shear=self.affine_params['shear'],
			
 
				+            perspective=self.affine_params['perspective'],
			
 
				+            border=[-self.img_size//2, -self.img_size//2]
			
 
				+            )
			
 
				+
			
 
				+        # target
			
 
				+        mosaic_target = {
			
 
				+            "boxes": mosaic_targets[..., 1:],
			
 
				+            "labels": mosaic_targets[..., 0],
			
 
				+            "orig_size": [self.img_size, self.img_size]
			
 
				+        }
			
 
				+
			
 
				+        return mosaic_img, mosaic_target
			
 
				+
			
 
				+    def __call__(self, image_list, target_list):
			
 
				+        if self.mosaic_type == 'yolov5':
			
 
				+            return self.yolov5_mosaic_augment(image_list, target_list)
			
 
				+        else:
			
 
				+            raise NotImplementedError("Unknown mosaic type: {}".format(self.mosaic_type))
			
 
				+
			
 
				+## Mixup Augmentation
			
 
				+class MixupAugment(object):
			
 
				+    def __init__(self,
			
 
				+                 img_size,
			
 
				+                 transform_config,
			
 
				+                 ) -> None:
			
 
				+        self.img_size = img_size
			
 
				+        self.mixup_type  = transform_config['mixup_type']
			
 
				+        self.mixup_scale = transform_config['mixup_scale']
			
 
				+
			
 
				+    def yolov5_mixup_augment(self, origin_image, origin_target, new_image, new_target):
			
 
				+        if origin_image.shape[:2] != new_image.shape[:2]:
			
 
				+            img_size = max(new_image.shape[:2])
			
 
				+            # origin_image is not a mosaic image
			
 
				+            orig_h, orig_w = origin_image.shape[:2]
			
 
				+            scale_ratio = img_size / max(orig_h, orig_w)
			
 
				+            if scale_ratio != 1: 
			
 
				+                interp = cv2.INTER_LINEAR if scale_ratio > 1 else cv2.INTER_AREA
			
 
				+                resize_size = (int(orig_w * scale_ratio), int(orig_h * scale_ratio))
			
 
				+                origin_image = cv2.resize(origin_image, resize_size, interpolation=interp)
			
 
				+
			
 
				+            # pad new image
			
 
				+            pad_origin_image = np.ones([img_size, img_size, origin_image.shape[2]], dtype=np.uint8) * 114
			
 
				+            pad_origin_image[:resize_size[1], :resize_size[0]] = origin_image
			
 
				+            origin_image = pad_origin_image.copy()
			
 
				+            del pad_origin_image
			
 
				+
			
 
				+        r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
			
 
				+        mixup_image = r * origin_image.astype(np.float32) + \
			
 
				+                    (1.0 - r)* new_image.astype(np.float32)
			
 
				+        mixup_image = mixup_image.astype(np.uint8)
			
 
				+        
			
 
				+        cls_labels = new_target["labels"].copy()
			
 
				+        box_labels = new_target["boxes"].copy()
			
 
				+
			
 
				+        mixup_bboxes = np.concatenate([origin_target["boxes"], box_labels], axis=0)
			
 
				+        mixup_labels = np.concatenate([origin_target["labels"], cls_labels], axis=0)
			
 
				+
			
 
				+        mixup_target = {
			
 
				+            "boxes": mixup_bboxes,
			
 
				+            "labels": mixup_labels,
			
 
				+            'orig_size': mixup_image.shape[:2]
			
 
				+        }
			
 
				+        
			
 
				+        return mixup_image, mixup_target
			
 
				+
			
 
				+    def yolox_mixup_augment(self, origin_image, origin_target, new_image, new_target):
			
 
				+        assert self.mixup_scale is not None, "You should set mixup_scale as a List type, such as [0.5, 1.5], not a NoneType."
			
 
				+
			
 
				+        jit_factor = random.uniform(*self.mixup_scale)
			
 
				+        FLIP = random.uniform(0, 1) > 0.5
			
 
				+
			
 
				+        # resize new image
			
 
				+        orig_h, orig_w = new_image.shape[:2]
			
 
				+        cp_scale_ratio = self.img_size / max(orig_h, orig_w)
			
 
				+        if cp_scale_ratio != 1: 
			
 
				+            interp = cv2.INTER_LINEAR if cp_scale_ratio > 1 else cv2.INTER_AREA
			
 
				+            resized_new_img = cv2.resize(
			
 
				+                new_image, (int(orig_w * cp_scale_ratio), int(orig_h * cp_scale_ratio)), interpolation=interp)
			
 
				+        else:
			
 
				+            resized_new_img = new_image
			
 
				+
			
 
				+        # pad new image
			
 
				+        cp_img = np.ones([self.img_size, self.img_size, new_image.shape[2]], dtype=np.uint8) * 114
			
 
				+        new_shape = (resized_new_img.shape[1], resized_new_img.shape[0])
			
 
				+        cp_img[:new_shape[1], :new_shape[0]] = resized_new_img
			
 
				+
			
 
				+        # resize padded new image
			
 
				+        cp_img_h, cp_img_w = cp_img.shape[:2]
			
 
				+        cp_new_shape = (int(cp_img_w * jit_factor),
			
 
				+                        int(cp_img_h * jit_factor))
			
 
				+        cp_img = cv2.resize(cp_img, (cp_new_shape[0], cp_new_shape[1]))
			
 
				+        cp_scale_ratio *= jit_factor
			
 
				+
			
 
				+        # flip new image
			
 
				+        if FLIP:
			
 
				+            cp_img = cp_img[:, ::-1, :]
			
 
				+
			
 
				+        # pad image
			
 
				+        origin_h, origin_w = cp_img.shape[:2]
			
 
				+        target_h, target_w = origin_image.shape[:2]
			
 
				+        padded_img = np.zeros(
			
 
				+            (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
			
 
				+        )
			
 
				+        padded_img[:origin_h, :origin_w] = cp_img
			
 
				+
			
 
				+        # crop padded image
			
 
				+        x_offset, y_offset = 0, 0
			
 
				+        if padded_img.shape[0] > target_h:
			
 
				+            y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
			
 
				+        if padded_img.shape[1] > target_w:
			
 
				+            x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
			
 
				+        padded_cropped_img = padded_img[
			
 
				+            y_offset: y_offset + target_h, x_offset: x_offset + target_w
			
 
				+        ]
			
 
				+
			
 
				+        # process target
			
 
				+        new_boxes = new_target["boxes"]
			
 
				+        new_labels = new_target["labels"]
			
 
				+        new_boxes[:, 0::2] = np.clip(new_boxes[:, 0::2] * cp_scale_ratio, 0, origin_w)
			
 
				+        new_boxes[:, 1::2] = np.clip(new_boxes[:, 1::2] * cp_scale_ratio, 0, origin_h)
			
 
				+        if FLIP:
			
 
				+            new_boxes[:, 0::2] = (
			
 
				+                origin_w - new_boxes[:, 0::2][:, ::-1]
			
 
				+            )
			
 
				+        new_boxes[:, 0::2] = np.clip(
			
 
				+            new_boxes[:, 0::2] - x_offset, 0, target_w
			
 
				+        )
			
 
				+        new_boxes[:, 1::2] = np.clip(
			
 
				+            new_boxes[:, 1::2] - y_offset, 0, target_h
			
 
				+        )
			
 
				+
			
 
				+        # mixup target
			
 
				+        mixup_boxes = np.concatenate([new_boxes, origin_target['boxes']], axis=0)
			
 
				+        mixup_labels = np.concatenate([new_labels, origin_target['labels']], axis=0)
			
 
				+        mixup_target = {
			
 
				+            'boxes': mixup_boxes,
			
 
				+            'labels': mixup_labels
			
 
				+        }
			
 
				+
			
 
				+        # mixup images
			
 
				+        origin_image = origin_image.astype(np.float32)
			
 
				+        origin_image = 0.5 * origin_image + 0.5 * padded_cropped_img.astype(np.float32)
			
 
				+
			
 
				+        return origin_image.astype(np.uint8), mixup_target
			
 
				+            
			
 
				+    def __call__(self, origin_image, origin_target, new_image, new_target):
			
 
				+        if self.mixup_type == "yolov5":
			
 
				+            return self.yolov5_mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				+        elif self.mixup_type == "yolox":
			
 
				+            return self.yolox_mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				+        else:
			
 
				+            raise NotImplementedError("Unknown mixup type: {}".format(self.mixup_type))
			
--- a/dataset/data_augment/yolov5_augment.py
+++ b/dataset/data_augment/yolov5_augment.py
@@ -121,225 +121,16 @@ class Albumentations(object):
 
				         return image, target
			
 
				 
			
 
				 
			
 
				-# ------------------------- Strong augmentations -------------------------
			
 
				-## YOLOv5-Mosaic
			
 
				-def yolov5_mosaic_augment(image_list, target_list, img_size, affine_params, keep_ratio=True, is_train=False):
			
 
				-    assert len(image_list) == 4
			
 
				-
			
 
				-    mosaic_img = np.ones([img_size*2, img_size*2, image_list[0].shape[2]], dtype=np.uint8) * 114
			
 
				-    # mosaic center
			
 
				-    yc, xc = [int(random.uniform(-x, 2*img_size + x)) for x in [-img_size // 2, -img_size // 2]]
			
 
				-    # yc = xc = self.img_size
			
 
				-
			
 
				-    mosaic_bboxes = []
			
 
				-    mosaic_labels = []
			
 
				-    for i in range(4):
			
 
				-        img_i, target_i = image_list[i], target_list[i]
			
 
				-        bboxes_i = target_i["boxes"]
			
 
				-        labels_i = target_i["labels"]
			
 
				-
			
 
				-        orig_h, orig_w, _ = img_i.shape
			
 
				-
			
 
				-        # resize
			
 
				-        if keep_ratio:
			
 
				-            r = img_size / max(orig_h, orig_w)
			
 
				-            if r != 1: 
			
 
				-                interp = cv2.INTER_LINEAR if (is_train or r > 1) else cv2.INTER_AREA
			
 
				-                img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
			
 
				-        else:
			
 
				-            interp = cv2.INTER_LINEAR if is_train else cv2.INTER_AREA
			
 
				-            img_i = cv2.resize(img_i, (img_size, img_size), interpolation=interp)
			
 
				-        h, w, _ = img_i.shape
			
 
				-
			
 
				-        # place img in img4
			
 
				-        if i == 0:  # top left
			
 
				-            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
			
 
				-            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
			
 
				-        elif i == 1:  # top right
			
 
				-            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, img_size * 2), yc
			
 
				-            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
			
 
				-        elif i == 2:  # bottom left
			
 
				-            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(img_size * 2, yc + h)
			
 
				-            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
			
 
				-        elif i == 3:  # bottom right
			
 
				-            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, img_size * 2), min(img_size * 2, yc + h)
			
 
				-            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
			
 
				-
			
 
				-        mosaic_img[y1a:y2a, x1a:x2a] = img_i[y1b:y2b, x1b:x2b]
			
 
				-        padw = x1a - x1b
			
 
				-        padh = y1a - y1b
			
 
				-
			
 
				-        # labels
			
 
				-        bboxes_i_ = bboxes_i.copy()
			
 
				-        if len(bboxes_i) > 0:
			
 
				-            # a valid target, and modify it.
			
 
				-            bboxes_i_[:, 0] = (w * bboxes_i[:, 0] / orig_w + padw)
			
 
				-            bboxes_i_[:, 1] = (h * bboxes_i[:, 1] / orig_h + padh)
			
 
				-            bboxes_i_[:, 2] = (w * bboxes_i[:, 2] / orig_w + padw)
			
 
				-            bboxes_i_[:, 3] = (h * bboxes_i[:, 3] / orig_h + padh)    
			
 
				-
			
 
				-            mosaic_bboxes.append(bboxes_i_)
			
 
				-            mosaic_labels.append(labels_i)
			
 
				-
			
 
				-    if len(mosaic_bboxes) == 0:
			
 
				-        mosaic_bboxes = np.array([]).reshape(-1, 4)
			
 
				-        mosaic_labels = np.array([]).reshape(-1)
			
 
				-    else:
			
 
				-        mosaic_bboxes = np.concatenate(mosaic_bboxes)
			
 
				-        mosaic_labels = np.concatenate(mosaic_labels)
			
 
				-
			
 
				-    # clip
			
 
				-    mosaic_bboxes = mosaic_bboxes.clip(0, img_size * 2)
			
 
				-
			
 
				-    # random perspective
			
 
				-    mosaic_targets = np.concatenate([mosaic_labels[..., None], mosaic_bboxes], axis=-1)
			
 
				-    mosaic_img, mosaic_targets = random_perspective(
			
 
				-        mosaic_img,
			
 
				-        mosaic_targets,
			
 
				-        affine_params['degrees'],
			
 
				-        translate=affine_params['translate'],
			
 
				-        scale=affine_params['scale'],
			
 
				-        shear=affine_params['shear'],
			
 
				-        perspective=affine_params['perspective'],
			
 
				-        border=[-img_size//2, -img_size//2]
			
 
				-        )
			
 
				-
			
 
				-    # target
			
 
				-    mosaic_target = {
			
 
				-        "boxes": mosaic_targets[..., 1:],
			
 
				-        "labels": mosaic_targets[..., 0],
			
 
				-        "orig_size": [img_size, img_size]
			
 
				-    }
			
 
				-
			
 
				-    return mosaic_img, mosaic_target
			
 
				-
			
 
				-## YOLOv5-Mixup
			
 
				-def yolov5_mixup_augment(origin_image, origin_target, new_image, new_target):
			
 
				-    if origin_image.shape[:2] != new_image.shape[:2]:
			
 
				-        img_size = max(new_image.shape[:2])
			
 
				-        # origin_image is not a mosaic image
			
 
				-        orig_h, orig_w = origin_image.shape[:2]
			
 
				-        scale_ratio = img_size / max(orig_h, orig_w)
			
 
				-        if scale_ratio != 1: 
			
 
				-            interp = cv2.INTER_LINEAR if scale_ratio > 1 else cv2.INTER_AREA
			
 
				-            resize_size = (int(orig_w * scale_ratio), int(orig_h * scale_ratio))
			
 
				-            origin_image = cv2.resize(origin_image, resize_size, interpolation=interp)
			
 
				-
			
 
				-        # pad new image
			
 
				-        pad_origin_image = np.ones([img_size, img_size, origin_image.shape[2]], dtype=np.uint8) * 114
			
 
				-        pad_origin_image[:resize_size[1], :resize_size[0]] = origin_image
			
 
				-        origin_image = pad_origin_image.copy()
			
 
				-        del pad_origin_image
			
 
				-
			
 
				-    r = np.random.beta(32.0, 32.0)  # mixup ratio, alpha=beta=32.0
			
 
				-    mixup_image = r * origin_image.astype(np.float32) + \
			
 
				-                  (1.0 - r)* new_image.astype(np.float32)
			
 
				-    mixup_image = mixup_image.astype(np.uint8)
			
 
				-    
			
 
				-    cls_labels = new_target["labels"].copy()
			
 
				-    box_labels = new_target["boxes"].copy()
			
 
				-
			
 
				-    mixup_bboxes = np.concatenate([origin_target["boxes"], box_labels], axis=0)
			
 
				-    mixup_labels = np.concatenate([origin_target["labels"], cls_labels], axis=0)
			
 
				-
			
 
				-    mixup_target = {
			
 
				-        "boxes": mixup_bboxes,
			
 
				-        "labels": mixup_labels,
			
 
				-        'orig_size': mixup_image.shape[:2]
			
 
				-    }
			
 
				-    
			
 
				-    return mixup_image, mixup_target
			
 
				-    
			
 
				-## YOLOX-Mixup
			
 
				-def yolox_mixup_augment(origin_img, origin_target, new_img, new_target, img_size, mixup_scale):
			
 
				-    jit_factor = random.uniform(*mixup_scale)
			
 
				-    FLIP = random.uniform(0, 1) > 0.5
			
 
				-
			
 
				-    # resize new image
			
 
				-    orig_h, orig_w = new_img.shape[:2]
			
 
				-    cp_scale_ratio = img_size / max(orig_h, orig_w)
			
 
				-    if cp_scale_ratio != 1: 
			
 
				-        interp = cv2.INTER_LINEAR if cp_scale_ratio > 1 else cv2.INTER_AREA
			
 
				-        resized_new_img = cv2.resize(
			
 
				-            new_img, (int(orig_w * cp_scale_ratio), int(orig_h * cp_scale_ratio)), interpolation=interp)
			
 
				-    else:
			
 
				-        resized_new_img = new_img
			
 
				-
			
 
				-    # pad new image
			
 
				-    cp_img = np.ones([img_size, img_size, new_img.shape[2]], dtype=np.uint8) * 114
			
 
				-    new_shape = (resized_new_img.shape[1], resized_new_img.shape[0])
			
 
				-    cp_img[:new_shape[1], :new_shape[0]] = resized_new_img
			
 
				-
			
 
				-    # resize padded new image
			
 
				-    cp_img_h, cp_img_w = cp_img.shape[:2]
			
 
				-    cp_new_shape = (int(cp_img_w * jit_factor),
			
 
				-                    int(cp_img_h * jit_factor))
			
 
				-    cp_img = cv2.resize(cp_img, (cp_new_shape[0], cp_new_shape[1]))
			
 
				-    cp_scale_ratio *= jit_factor
			
 
				-
			
 
				-    # flip new image
			
 
				-    if FLIP:
			
 
				-        cp_img = cp_img[:, ::-1, :]
			
 
				-
			
 
				-    # pad image
			
 
				-    origin_h, origin_w = cp_img.shape[:2]
			
 
				-    target_h, target_w = origin_img.shape[:2]
			
 
				-    padded_img = np.zeros(
			
 
				-        (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
			
 
				-    )
			
 
				-    padded_img[:origin_h, :origin_w] = cp_img
			
 
				-
			
 
				-    # crop padded image
			
 
				-    x_offset, y_offset = 0, 0
			
 
				-    if padded_img.shape[0] > target_h:
			
 
				-        y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
			
 
				-    if padded_img.shape[1] > target_w:
			
 
				-        x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
			
 
				-    padded_cropped_img = padded_img[
			
 
				-        y_offset: y_offset + target_h, x_offset: x_offset + target_w
			
 
				-    ]
			
 
				-
			
 
				-    # process target
			
 
				-    new_boxes = new_target["boxes"]
			
 
				-    new_labels = new_target["labels"]
			
 
				-    new_boxes[:, 0::2] = np.clip(new_boxes[:, 0::2] * cp_scale_ratio, 0, origin_w)
			
 
				-    new_boxes[:, 1::2] = np.clip(new_boxes[:, 1::2] * cp_scale_ratio, 0, origin_h)
			
 
				-    if FLIP:
			
 
				-        new_boxes[:, 0::2] = (
			
 
				-            origin_w - new_boxes[:, 0::2][:, ::-1]
			
 
				-        )
			
 
				-    new_boxes[:, 0::2] = np.clip(
			
 
				-        new_boxes[:, 0::2] - x_offset, 0, target_w
			
 
				-    )
			
 
				-    new_boxes[:, 1::2] = np.clip(
			
 
				-        new_boxes[:, 1::2] - y_offset, 0, target_h
			
 
				-    )
			
 
				-
			
 
				-    # mixup target
			
 
				-    mixup_boxes = np.concatenate([new_boxes, origin_target['boxes']], axis=0)
			
 
				-    mixup_labels = np.concatenate([new_labels, origin_target['labels']], axis=0)
			
 
				-    mixup_target = {
			
 
				-        'boxes': mixup_boxes,
			
 
				-        'labels': mixup_labels
			
 
				-    }
			
 
				-
			
 
				-    # mixup images
			
 
				-    origin_img = origin_img.astype(np.float32)
			
 
				-    origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
			
 
				-
			
 
				-    return origin_img.astype(np.uint8), mixup_target
			
 
				-        
			
 
				-
			
 
				 # ------------------------- Preprocessers -------------------------
			
 
				 ## YOLOv5-style Transform for Train
			
 
				 class YOLOv5Augmentation(object):
			
 
				-    def __init__(self, img_size=640, trans_config=None, use_ablu=False):
			
 
				+    def __init__(self, img_size=640, affine_params=None, use_ablu=False):
			
 
				         # Basic parameters
			
 
				         self.img_size = img_size
			
 
				         self.pixel_mean = [0., 0., 0.]
			
 
				         self.pixel_std  = [255., 255., 255.]
			
 
				         self.color_format = 'bgr'
			
 
				-        self.trans_config = trans_config
			
 
				+        self.affine_params = affine_params
			
 
				         # Albumentations
			
 
				         self.ablu_trans = Albumentations(img_size) if use_ablu else None
			
 
				 
			
@@ -367,9 +158,10 @@ class YOLOv5Augmentation(object):
 
				             img, target = self.ablu_trans(img, target)
			
 
				 
			
 
				         # --------------- HSV augmentations ---------------
			
 
				-        augment_hsv(img, hgain=self.trans_config['hsv_h'], 
			
 
				-                    sgain=self.trans_config['hsv_s'], 
			
 
				-                    vgain=self.trans_config['hsv_v'])
			
 
				+        augment_hsv(img,
			
 
				+                    hgain=self.affine_params['hsv_h'], 
			
 
				+                    sgain=self.affine_params['hsv_s'], 
			
 
				+                    vgain=self.affine_params['hsv_v'])
			
 
				         
			
 
				         # --------------- Spatial augmentations ---------------
			
 
				         ## Random perspective
			
@@ -384,11 +176,11 @@ class YOLOv5Augmentation(object):
 
				                 (target['labels'][..., None], target['boxes']), axis=-1)
			
 
				             img, target_ = random_perspective(
			
 
				                 img, target_,
			
 
				-                degrees=self.trans_config['degrees'],
			
 
				-                translate=self.trans_config['translate'],
			
 
				-                scale=self.trans_config['scale'],
			
 
				-                shear=self.trans_config['shear'],
			
 
				-                perspective=self.trans_config['perspective']
			
 
				+                degrees     = self.affine_params['degrees'],
			
 
				+                translate   = self.affine_params['translate'],
			
 
				+                scale       = self.affine_params['scale'],
			
 
				+                shear       = self.affine_params['shear'],
			
 
				+                perspective = self.affine_params['perspective']
			
 
				                 )
			
 
				             target['boxes'] = target_[..., 1:]
			
 
				             target['labels'] = target_[..., 0]
			
--- a/dataset/voc.py
+++ b/dataset/voc.py
@@ -3,13 +3,12 @@ import random
 
				 import numpy as np
			
 
				 import os.path as osp
			
 
				 import xml.etree.ElementTree as ET
			
 
				-
			
 
				-import torch
			
 
				 import torch.utils.data as data
			
 
				+
			
 
				 try:
			
 
				-    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 except:
			
 
				-    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				 # VOC class names
			
@@ -90,10 +89,19 @@ class VOCDataset(data.Dataset):
 
				                 self.ids.append((rootpath, line.strip()))
			
 
				         self.dataset_size = len(self.ids)
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.transform = transform
			
 
				-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				         self.trans_config = trans_config
			
 
				+        self.transform = transform
			
 
				+        # ----------- Strong augmentation -----------
			
 
				+        if is_train:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+            self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+        else:
			
 
				+            self.mosaic_prob = 0.0
			
 
				+            self.mixup_prob  = 0.0
			
 
				+            self.mosaic_augment = None
			
 
				+            self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
@@ -147,13 +155,14 @@ class VOCDataset(data.Dataset):
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				-        # load 4x mosaic image
			
 
				+        # ------------ Prepare 4 indexes of images ------------
			
 
				+        ## Load 4x mosaic image
			
 
				         index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				         id1 = index
			
 
				         id2, id3, id4 = random.sample(index_list, 3)
			
 
				         indexs = [id1, id2, id3, id4]
			
 
				 
			
 
				-        # load images and targets
			
 
				+        ## Load images and targets
			
 
				         image_list = []
			
 
				         target_list = []
			
 
				         for index in indexs:
			
@@ -161,26 +170,22 @@ class VOCDataset(data.Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic
			
 
				-        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				-            image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
			
 
				+        # ------------ Mosaic augmentation ------------
			
 
				+        image, target = self.mosaic_augment(image_list, target_list)
			
 
				 
			
 
				         return image, target
			
 
				 
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				-        # YOLOv5 type Mixup
			
 
				-        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+        # ------------ Load a new image & target ------------
			
 
				+        if self.mixup_augment.mixup_type == 'yolov5':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_mosaic(new_index)
			
 
				-            image, target = yolov5_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target)
			
 
				-        # YOLOX type Mixup
			
 
				-        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+        elif self.mixup_augment.mixup_type == 'yolox':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_image_target(new_index)
			
 
				-            image, target = yolox_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+            
			
 
				+        # ------------ Mixup augmentation ------------
			
 
				+        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				 
			
 
				         return image, target
			
 
				     
			
@@ -275,22 +280,24 @@ if __name__ == "__main__":
 
				         'aug_type': args.aug_type,    # optional: ssd, yolov5
			
 
				         'pixel_mean': [123.675, 116.28, 103.53],
			
 
				         'pixel_std':  [58.395, 57.12, 57.375],
			
 
				-        # Basic Augment
			
 
				-        'degrees': 0.0,
			
 
				-        'translate': 0.2,
			
 
				-        'scale': [0.1, 2.0],
			
 
				-        'shear': 0.0,
			
 
				-        'perspective': 0.0,
			
 
				-        'hsv_h': 0.015,
			
 
				-        'hsv_s': 0.7,
			
 
				-        'hsv_v': 0.4,
			
 
				         'use_ablu': True,
			
 
				+        # Basic Augment
			
 
				+        'affine_params': {
			
 
				+            'degrees': 0.0,
			
 
				+            'translate': 0.2,
			
 
				+            'scale': [0.1, 2.0],
			
 
				+            'shear': 0.0,
			
 
				+            'perspective': 0.0,
			
 
				+            'hsv_h': 0.015,
			
 
				+            'hsv_s': 0.7,
			
 
				+            'hsv_v': 0.4,
			
 
				+        },
			
 
				         # Mosaic & Mixup
			
 
				+        'mosaic_keep_ratio': False,
			
 
				         'mosaic_prob': args.mosaic,
			
 
				         'mixup_prob': args.mixup,
			
 
				-        'mosaic_type': 'yolov5_mosaic',
			
 
				-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
			
 
				-        'mosaic_keep_ratio': False,
			
 
				+        'mosaic_type': 'yolov5',
			
 
				+        'mixup_type':  'yolov5',
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
 
				     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
			
--- a/dataset/widerface.py
+++ b/dataset/widerface.py
@@ -12,9 +12,9 @@ except:
 
				     print("It seems that the COCOAPI is not installed.")
			
 
				 
			
 
				 try:
			
 
				-    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 except:
			
 
				-    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+    from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				 widerface_class_labels = ('face',)
			
@@ -44,12 +44,20 @@ class WiderFaceDataset(Dataset):
 
				         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
			
 
				         self.ids = self.coco.getImgIds()
			
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
 
				-
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.transform = transform
			
 
				-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				         self.trans_config = trans_config
			
 
				+        self.transform = transform
			
 
				+        # ----------- Strong augmentation -----------
			
 
				+        if is_train:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+            self.mixup_prob  = trans_config['mixup_prob']  if trans_config else 0.0
			
 
				+            self.mosaic_augment = MosaicAugment(img_size, trans_config, is_train)
			
 
				+            self.mixup_augment  = MixupAugment(img_size, trans_config)
			
 
				+        else:
			
 
				+            self.mosaic_prob = 0.0
			
 
				+            self.mixup_prob  = 0.0
			
 
				+            self.mosaic_augment = None
			
 
				+            self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
@@ -64,13 +72,14 @@ class WiderFaceDataset(Dataset):
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
 
				-        # load 4x mosaic image
			
 
				+        # ------------ Prepare 4 indexes of images ------------
			
 
				+        ## Load 4x mosaic image
			
 
				         index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				         id1 = index
			
 
				         id2, id3, id4 = random.sample(index_list, 3)
			
 
				         indexs = [id1, id2, id3, id4]
			
 
				 
			
 
				-        # load images and targets
			
 
				+        ## Load images and targets
			
 
				         image_list = []
			
 
				         target_list = []
			
 
				         for index in indexs:
			
@@ -78,26 +87,22 @@ class WiderFaceDataset(Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic
			
 
				-        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				-            image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
			
 
				+        # ------------ Mosaic augmentation ------------
			
 
				+        image, target = self.mosaic_augment(image_list, target_list)
			
 
				 
			
 
				         return image, target
			
 
				 
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				-        # YOLOv5 type Mixup
			
 
				-        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+        # ------------ Load a new image & target ------------
			
 
				+        if self.mixup_augment.mixup_type == 'yolov5':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_mosaic(new_index)
			
 
				-            image, target = yolov5_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target)
			
 
				-        # YOLOX type Mixup
			
 
				-        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+        elif self.mixup_augment.mixup_type == 'yolox':
			
 
				             new_index = np.random.randint(0, len(self.ids))
			
 
				             new_image, new_target = self.load_image_target(new_index)
			
 
				-            image, target = yolox_mixup_augment(
			
 
				-                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+            
			
 
				+        # ------------ Mixup augmentation ------------
			
 
				+        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target)
			
 
				 
			
 
				         return image, target
			
 
				     
			
--- a/engine.py
+++ b/engine.py
@@ -1140,7 +1140,7 @@ class RTDetrTrainer(object):
 
				         self.args.fp16 = False
			
 
				         # weak augmentatino stage
			
 
				         self.second_stage = False
			
 
				-        self.second_stage_epoch = -1
			
 
				+        self.second_stage_epoch = 5
			
 
				         # path to save model
			
 
				         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
			
 
				         os.makedirs(self.path_to_save, exist_ok=True)
			
@@ -1160,8 +1160,6 @@ class RTDetrTrainer(object):
 
				             args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				         self.val_transform, _ = build_transform(
			
 
				             args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
			
 
				-        if self.trans_cfg["mosaic_prob"] > 0.5:
			
 
				-            self.second_stage_epoch = 5
			
 
				 
			
 
				         # ---------------------------- Build Dataset & Dataloader ----------------------------
			
 
				         self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)
			
@@ -1488,7 +1486,6 @@ class RTDetrTrainer(object):
 
				         self.train_transform, self.trans_cfg = build_transform(
			
 
				             args=self.args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
			
 
				         
			
 
				-        self.train_transform.set_weak_augment()
			
 
				         self.train_loader.dataset.transform = self.train_transform
			
 
				 
			
 
				 ## Real-time PlainDETR Trainer
			
--- a/models/detectors/rtcdet/loss.py
+++ b/models/detectors/rtcdet/loss.py
@@ -82,7 +82,7 @@ class Criterion(object):
 
				                                  'labels': [...], 
			
 
				                                  'orig_size': ...}, ...]
			
 
				         """
			
 
				-        bs, num_anchors = outputs['pred_cls'][0].shape[:2]
			
 
				+        bs = outputs['pred_cls'][0].shape[0]
			
 
				         device = outputs['pred_cls'][0].device
			
 
				         anchors = outputs['anchors']
			
 
				         fpn_strides = outputs['strides']
			
--- a/models/detectors/rtdetr/basic_modules/fpn.py
+++ b/models/detectors/rtdetr/basic_modules/fpn.py
@@ -66,8 +66,6 @@ class HybridEncoder(nn.Module):
 
				         self.input_proj_2 = BasicConv(c4, self.out_dim, kernel_size=1, act_type=None, norm_type=norm_type)
			
 
				         self.input_proj_3 = BasicConv(c3, self.out_dim, kernel_size=1, act_type=None, norm_type=norm_type)
			
 
				 
			
 
				-        # ---------------- Downsample ----------------
			
 
				-
			
 
				         # ---------------- Transformer Encoder ----------------
			
 
				         self.transformer_encoder = TransformerEncoder(d_model        = self.out_dim,
			
 
				                                                       num_heads      = num_heads,
			
--- a/models/detectors/rtdetr/basic_modules/transformer.py
+++ b/models/detectors/rtdetr/basic_modules/transformer.py
@@ -316,7 +316,7 @@ class TransformerEncoder(nn.Module):
 
				         # -------- Transformer encoder --------
			
 
				         channels, fmp_h, fmp_w = src.shape[1:]
			
 
				         # [B, C, H, W] -> [B, N, C], N=HxW
			
 
				-        src_flatten = src.flatten(2).permute(0, 2, 1)
			
 
				+        src_flatten = src.flatten(2).permute(0, 2, 1).contiguous()
			
 
				         memory = src_flatten
			
 
				 
			
 
				         # PosEmbed: [1, N, C]
			
@@ -328,7 +328,8 @@ class TransformerEncoder(nn.Module):
 
				             memory = encoder(memory, pos_embed=pos_embed)
			
 
				 
			
 
				         # Output: [B, N, C] -> [B, C, N] -> [B, C, H, W]
			
 
				-        src = memory.permute(0, 2, 1).reshape([-1, channels, fmp_h, fmp_w])
			
 
				+        src = memory.permute(0, 2, 1).contiguous()
			
 
				+        src = src.view([-1, channels, fmp_h, fmp_w])
			
 
				 
			
 
				         return src
			
 
				 
			
--- a/models/detectors/rtdetr/rtdetr_decoder.py
+++ b/models/detectors/rtdetr/rtdetr_decoder.py
@@ -201,7 +201,7 @@ class RTDETRTransformer(nn.Module):
 
				             # [l], start index of each level
			
 
				             level_start_index.append(h * w + level_start_index[-1])
			
 
				             # [B, C, H, W] -> [B, N, C], N=HxW
			
 
				-            feat_flatten.append(feat.flatten(2).permute(0, 2, 1))
			
 
				+            feat_flatten.append(feat.flatten(2).permute(0, 2, 1).contiguous())
			
 
				 
			
 
				         # [B, N, C], N = N_0 + N_1 + ...
			
 
				         feat_flatten = torch.cat(feat_flatten, dim=1)