Browse Source

release YOLOv8-s

yjh0410 1 year ago
parent
commit
5183498321

+ 3 - 6
config/__init__.py

@@ -33,8 +33,7 @@ from .data_config.transform_config import (
     # SSD-Style
     ssd_trans_config,
     # RT-DETR style
-    rtdetr_s_trans_config,
-    rtdetr_l_trans_config,
+    rtdetr_base_trans_config,
 )
 
 def build_trans_config(trans_config='ssd'):
@@ -74,10 +73,8 @@ def build_trans_config(trans_config='ssd'):
         cfg = yolox_x_trans_config
 
     # RT-DETR style
-    elif trans_config == 'rtdetr_s':
-        cfg = rtdetr_s_trans_config
-    elif trans_config == 'rtdetr_l':
-        cfg = rtdetr_l_trans_config
+    elif trans_config == 'rtdetr_base':
+        cfg = rtdetr_base_trans_config
 
     print('Transform Config: {} \n'.format(cfg))
 

+ 1 - 26
config/data_config/transform_config.py

@@ -296,32 +296,7 @@ ssd_trans_config = {
 
 
 # ----------------------- SSD-Style Transform -----------------------
-rtdetr_s_trans_config = {
-    'aug_type': 'rtdetr',
-    'use_ablu': True,
-    'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics
-    'pixel_std':  [58.395, 57.12, 57.375],    # IN-1K statistics
-    # Basic Augment
-    'affine_params': {
-        'degrees': 0.0,
-        'translate': 0.2,
-        'scale': [0.1, 2.0],
-        'shear': 0.0,
-        'perspective': 0.0,
-        'hsv_h': 0.015,
-        'hsv_s': 0.7,
-        'hsv_v': 0.4,
-    },
-    # Mosaic & Mixup
-    'mosaic_keep_ratio': False,
-    'mosaic_prob': 0.0,
-    'mixup_prob':  0.0,
-    'mosaic_type': 'yolov5',
-    'mixup_type':  'yolov5',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup
-}
-
-rtdetr_l_trans_config = {
+rtdetr_base_trans_config = {
     'aug_type': 'rtdetr',
     'use_ablu': True,
     'pixel_mean': [123.675, 116.28, 103.53],  # IN-1K statistics

+ 3 - 3
config/model_config/rtdetr_config.py

@@ -53,7 +53,7 @@ rtdetr_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'rtdetr_s',
+        'trans_type': 'rtdetr_base',
         # ---------------- Train config ----------------
         'trainer_type': 'rtdetr',
     },
@@ -110,7 +110,7 @@ rtdetr_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'rtdetr_l',
+        'trans_type': 'rtdetr_base',
         # ---------------- Train config ----------------
         'trainer_type': 'rtdetr',
     },
@@ -167,7 +167,7 @@ rtdetr_cfg = {
         # ---------------- Train config ----------------
         ## input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'rtdetr_l',
+        'trans_type': 'rtdetr_base',
         # ---------------- Train config ----------------
         'trainer_type': 'rtdetr',
     },

+ 2 - 2
config/model_config/yolov3_config.py

@@ -35,7 +35,7 @@ yolov3_cfg = {
         # ---------------- Train config ----------------
         ## input
         'trans_type': 'yolov5_l',
-        'multi_scale': [0.5, 1.0],
+        'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
         'iou_thresh': 0.5,
@@ -82,7 +82,7 @@ yolov3_cfg = {
         # ---------------- Train config ----------------
         ## input
         'trans_type': 'yolov5_n',
-        'multi_scale': [0.5, 1.0],
+        'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
         'iou_thresh': 0.5,

+ 2 - 2
config/model_config/yolov4_config.py

@@ -35,7 +35,7 @@ yolov4_cfg = {
         # ---------------- Train config ----------------
         ## input
         'trans_type': 'yolov5_l',
-        'multi_scale': [0.5, 1.0],
+        'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
         'iou_thresh': 0.5,
@@ -82,7 +82,7 @@ yolov4_cfg = {
         # ---------------- Train config ----------------
         ## input
         'trans_type': 'yolov5_n',
-        'multi_scale': [0.5, 1.0],
+        'multi_scale': [0.5, 1.25],  # 320 -> 800
         # ---------------- Assignment config ----------------
         ## matcher
         'iou_thresh': 0.5,

+ 16 - 14
dataset/coco.py

@@ -272,24 +272,26 @@ if __name__ == "__main__":
 
     trans_config = {
         'aug_type': args.aug_type,    # optional: ssd, yolov5
-        'pixel_mean': [0., 0., 0.],
-        'pixel_std':  [255., 255., 255.],
-        # Basic Augment
-        'degrees': 0.0,
-        'translate': 0.2,
-        'scale': [0.1, 2.0],
-        'shear': 0.0,
-        'perspective': 0.0,
-        'hsv_h': 0.015,
-        'hsv_s': 0.7,
-        'hsv_v': 0.4,
+        'pixel_mean': [123.675, 116.28, 103.53],
+        'pixel_std':  [58.395, 57.12, 57.375],
         'use_ablu': True,
+        # Basic Augment
+        'affine_params': {
+            'degrees': 0.0,
+            'translate': 0.2,
+            'scale': [0.1, 2.0],
+            'shear': 0.0,
+            'perspective': 0.0,
+            'hsv_h': 0.015,
+            'hsv_s': 0.7,
+            'hsv_v': 0.4,
+        },
         # Mosaic & Mixup
+        'mosaic_keep_ratio': False,
         'mosaic_prob': args.mosaic,
         'mixup_prob': args.mixup,
-        'mosaic_type': 'yolov5_mosaic',
-        'mixup_type': args.mixup_type,   # optional: yolov5_mixup, yolox_mixup
-        'mosaic_keep_ratio': False,
+        'mosaic_type': 'yolov5',
+        'mixup_type':  'yolov5',
         'mixup_scale': [0.5, 1.5]
     }
     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)

+ 1 - 0
demo.py

@@ -277,6 +277,7 @@ def run():
     model_cfg = build_model_config(args)
     trans_cfg = build_trans_config(model_cfg['trans_type'])
     data_cfg  = build_dataset_config(args)
+    
     ## Data info
     num_classes = data_cfg['num_classes']
     class_names = data_cfg['class_names']

+ 1 - 0
models/detectors/rtdetr/README.md

@@ -10,6 +10,7 @@ This model is not yet complete.
 
 - For the backbone of the image encoder, we use the IN-1K classification pretrained weight from torchvision, which is different from the official
 RT-DETR. It might be hard to train RT-DETR from scratch without IN-1K pretrained weight.
+- For the HybridEncoder, we use the C2f of YOLOv8 rather than the CSPRepLayer.
 - For training, we train RT-DETR series with 6x (~72 epochs) schedule on COCO and use ModelEMA trick. We close the fp16 training trick.
 - For data augmentation, we use the `color jitter`, `random hflip`, `random crop`, and multi-scale training trick.
 - For optimizer, we use AdamW with weight decay 0.0001 and base per image lr 0.0001 / 16.

+ 1 - 1
models/detectors/yolov8/README.md

@@ -3,7 +3,7 @@
 |   Model   |  Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |-----------|--------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 | YOLOv8-N  | 8xb16  |  640  |          37.0          |        52.9       |        8.8        |         3.2        | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_n_coco.pth) |
-| YOLOv8-S  | 8xb16  |  640  |                        |                   |                   |                    |  |
+| YOLOv8-S  | 8xb16  |  640  |          43.5          |        60.4       |       28.8        |         11.2       | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_s_coco.pth) |
 | YOLOv8-M  | 8xb16  |  640  |                        |                   |                   |                    |  |
 | YOLOv8-L  | 8xb16  |  640  |          50.7          |        68.3       |       165.7       |         43.7       | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov8_l_coco.pth) |
 

+ 3 - 3
models/detectors/yolov8/yolov8_backbone.py

@@ -105,9 +105,9 @@ if __name__ == '__main__':
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'width': 1.0,
-        'depth': 1.0,
-        'ratio': 1.0,
+        'width': 0.25,
+        'depth': 0.34,
+        'ratio': 2.0,
     }
     model, feats = build_backbone(cfg)
     x = torch.randn(1, 3, 640, 640)

+ 5 - 5
models/detectors/yolov8/yolov8_pafpn.py

@@ -129,12 +129,12 @@ if __name__ == '__main__':
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
-        'width': 1.0,
-        'depth': 1.0,
-        'ratio': 1.0,
+        'width': 0.25,
+        'depth': 0.34,
+        'ratio': 2.0,
     }
-    model = build_fpn(cfg, in_dims=[256, 512, 512])
-    pyramid_feats = [torch.randn(1, 256, 80, 80), torch.randn(1, 512, 40, 40), torch.randn(1, 512, 20, 20)]
+    model = build_fpn(cfg, in_dims=[64, 128, 256])
+    pyramid_feats = [torch.randn(1, 64, 80, 80), torch.randn(1, 128, 40, 40), torch.randn(1, 256, 20, 20)]
     t0 = time.time()
     outputs = model(pyramid_feats)
     t1 = time.time()

+ 6 - 2
models/detectors/yolov8/yolov8_pred.py

@@ -111,9 +111,13 @@ class MultiLevelPredLayer(nn.Module):
                                   for level in range(num_levels)
                                   ])
         ## proj conv
-        proj_init = torch.arange(reg_max, dtype=torch.float)
         self.proj_conv = nn.Conv2d(self.reg_max, 1, kernel_size=1, bias=False).requires_grad_(False)
-        self.proj_conv.weight.data[:] = nn.Parameter(proj_init.view([1, reg_max, 1, 1]))
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        proj_init = torch.arange(self.reg_max, dtype=torch.float)
+        self.proj_conv.weight.data[:] = nn.Parameter(proj_init.view([1, self.reg_max, 1, 1]), requires_grad=False)
 
     def forward(self, cls_feats, reg_feats):
         all_anchors = []

+ 0 - 15
models/detectors/yolox/README.md

@@ -12,21 +12,6 @@
 - For optimizer, we use SGD with weight decay 0.0005 and base per image lr 0.01 / 64,.
 - For learning rate scheduler, we use Cosine decay scheduler.
 
-On the other hand, we are trying to use **AdamW** to train our reproduced YOLOX. We will update the new results as soon as possible.
-
-|   Model | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
-|---------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
-| YOLOX-N | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOX-S | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOX-M | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOX-L | 8xb16 |  640  |                        |                   |                   |                    |  |
-| YOLOX-X | 8xb16 |  640  |                        |                   |                   |                    |  |
-
-- For training, we train YOLOX series with 300 epochs on COCO.
-- For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
-- For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64,.
-- For learning rate scheduler, we use linear decay scheduler.
-
 ## Train YOLOX
 ### Single GPU
 Taking training YOLOX-S on COCO as the example,

+ 9 - 32
test.py

@@ -70,10 +70,6 @@ def parse_args():
     parser.add_argument('--load_cache', action='store_true', default=False,
                         help='load data into memory.')
 
-    # Task setting
-    parser.add_argument('-t', '--task', default='det', choices=['det', 'det_seg', 'det_pos', 'det_seg_pos'],
-                        help='task type.')
-
     return parser.parse_args()
 
 
@@ -131,18 +127,6 @@ def test_det(args,
             # save result
             cv2.imwrite(os.path.join(save_path, str(index).zfill(6) +'.jpg'), img_processed)
 
-@torch.no_grad()
-def test_det_seg():
-    pass
-
-@torch.no_grad()
-def test_det_pos():
-    pass
-
-@torch.no_grad()
-def test_det_seg_pos():
-    pass
-
 
 if __name__ == '__main__':
     args = parse_args()
@@ -199,19 +183,12 @@ if __name__ == '__main__':
         
     print("================= DETECT =================")
     # run
-    if args.task == "det":
-        test_det(args=args,
-                model=model, 
-                device=device, 
-                dataset=dataset,
-                transform=val_transform,
-                class_colors=class_colors,
-                class_names=dataset_info['class_names'],
-                class_indexs=dataset_info['class_indexs'],
-                )
-    elif args.task == "det_seg":
-        test_det_seg()
-    elif args.task == "det_pos":
-        test_det_pos()
-    elif args.task == "det_seg_pos":
-        test_det_seg_pos()
+    test_det(args=args,
+             model=model, 
+             device=device, 
+             dataset=dataset,
+             transform=val_transform,
+             class_colors=class_colors,
+             class_names=dataset_info['class_names'],
+             class_indexs=dataset_info['class_indexs'],
+             )