Ver Fonte

add YOLOv5 series

yjh0410 há 2 anos atrás
pai
commit
412bcc68b3

+ 6 - 3
README.md

@@ -103,9 +103,12 @@ python train.py --cuda -d coco --root path/to/COCO -v yolov1 -bs 16 --max_epoch
 | YOLOv1        | ResNet-18          |  640  |  √   |  150  |       |        27.9            |       47.5        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov1_coco.pth) |
 | YOLOv2        | DarkNet-19         |  640  |  √   |  150  |       |        32.7            |       50.9        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov2_coco.pth) |
 | YOLOv3        | DarkNet-53         |  640  |  √   |  250  |       |        42.9            |       63.5        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) |
-| YOLOv4        | CSPDarkNet-L       |  640  |  √   |  250  |       |        46.6            |       65.8        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
-| YOLOv5        | CSPDarkNet-53      |  640  |  √   |  250  |       |                        |                   |  |
-| YOLOX         | CSPDarkNet-L       |  640  |  √   |  300  |       |        46.6            |       66.1        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
+| YOLOv4        | CSPDarkNet-53      |  640  |  √   |  250  |       |        46.6            |       65.8        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
+| YOLOv5-N      | CSPDarkNet-L       |  640  |  √   |  250  |       |                        |                   |  |
+| YOLOv5-S      | CSPDarkNet-L       |  640  |  √   |  250  |       |                        |                   |  |
+| YOLOv5-M      | CSPDarkNet-L       |  640  |  √   |  250  |       |                        |                   |  |
+| YOLOv5-L      | CSPDarkNet-L       |  640  |  √   |  250  |       |                        |                   |  |
+| YOLOX-L       | CSPDarkNet-L       |  640  |  √   |  300  |       |        46.6            |       66.1        | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
 | YOLOv7-Tiny   | ELANNet-Tiny       |  640  |  √   |  300  |       |                    |               |  |
 | YOLOv7-Large  | ELANNet-Large      |  640  |  √   |  300  |       |                    |               |  |
 

+ 2 - 2
config/__init__.py

@@ -25,8 +25,8 @@ def build_model_config(args):
     elif args.model == 'yolov4':
         cfg = yolov4_cfg
     # YOLOv5
-    elif args.model == 'yolov5':
-        cfg = yolov5_cfg
+    elif args.model in ['yolov5_nano', 'yolov5_small', 'yolov5_medium', 'yolov5_large', 'yolov5_huge']:
+        cfg = yolov5_cfg[args.model]
     # YOLOv7
     elif args.model in ['yolov7_nano', 'yolov7_tiny', 'yolov7_large', 'yolov7_huge']:
         cfg = yolov7_cfg[args.model]

+ 260 - 49
config/yolov5_config.py

@@ -1,53 +1,264 @@
 # YOLOv5 Config
 
 yolov5_cfg = {
-    # input
-    'trans_type': 'yolov5_strong',
-    'multi_scale': [0.5, 1.0],
-    # model
-    'backbone': 'cspdarknet',
-    'pretrained': True,
-    'bk_act': 'silu',
-    'bk_norm': 'BN',
-    'bk_dpw': False,
-    'stride': [8, 16, 32],  # P3, P4, P5
-    'width': 1.0,
-    'depth': 1.0,
-     # fpn
-    'fpn': 'yolo_pafpn',
-    'fpn_act': 'silu',
-    'fpn_norm': 'BN',
-    'fpn_depthwise': False,
-    # head
-    'head': 'decoupled_head',
-    'head_act': 'silu',
-    'head_norm': 'BN',
-    'num_cls_head': 2,
-    'num_reg_head': 2,
-    'head_depthwise': False,
-    'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                    [30, 61],   [62, 45],   [59, 119],    # P4
-                    [116, 90],  [156, 198], [373, 326]],  # P5
-    # matcher
-    'anchor_thresh': 4.0,
-    # loss weight
-    'loss_obj_weight': 1.0,
-    'loss_cls_weight': 1.0,
-    'loss_box_weight': 5.0,
-    # training configuration
-    'no_aug_epoch': 10,
-    # optimizer
-    'optimizer': 'sgd',        # optional: sgd, adam, adamw
-    'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
-    'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
-    'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
-    # model EMA
-    'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
-    'ema_tau': 2000,
-    # lr schedule
-    'scheduler': 'linear',
-    'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
-    'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
-    'warmup_momentum': 0.8,
-    'warmup_bias_lr': 0.1,
+    'yolov5_nano':{
+        # input
+        'trans_type': 'yolov5_weak',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'cspdarknet',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 0.25,
+        'depth': 0.34,
+        # fpn
+        'fpn': 'yolo_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # matcher
+        'anchor_thresh': 4.0,
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 10,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
+    'yolov5_small':{
+        # input
+        'trans_type': 'yolov5_weak',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'cspdarknet',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 0.50,
+        'depth': 0.34,
+        # fpn
+        'fpn': 'yolo_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # matcher
+        'anchor_thresh': 4.0,
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 10,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
+    'yolov5_medium':{
+        # input
+        'trans_type': 'yolov5_strong',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'cspdarknet',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 0.75,
+        'depth': 0.67,
+        # fpn
+        'fpn': 'yolo_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # matcher
+        'anchor_thresh': 4.0,
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 10,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
+    'yolov5_large':{
+        # input
+        'trans_type': 'yolov5_strong',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'cspdarknet',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 1.0,
+        'depth': 1.0,
+        # fpn
+        'fpn': 'yolo_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # matcher
+        'anchor_thresh': 4.0,
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 10,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
+    'yolov5_huge':{
+        # input
+        'trans_type': 'yolov5_strong',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'cspdarknet',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 1.25,
+        'depth': 1.34,
+        # fpn
+        'fpn': 'yolo_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # matcher
+        'anchor_thresh': 4.0,
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 10,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
 }

+ 1 - 1
models/__init__.py

@@ -35,7 +35,7 @@ def build_model(args,
         model, criterion = build_yolov4(
             args, model_cfg, device, num_classes, trainable)
     # YOLOv5   
-    elif args.model == 'yolov5':
+    elif args.model in ['yolov5_nano', 'yolov5_small', 'yolov5_medium', 'yolov5_large', 'yolov5_huge']:
         model, criterion = build_yolov5(
             args, model_cfg, device, num_classes, trainable)
     # YOLOv7

+ 46 - 24
models/yolov5/yolov5_backbone.py

@@ -9,7 +9,11 @@ except:
     from yolov5_neck import SPPF
 
 model_urls = {
+    "cspdarknet_nano": None,
+    "cspdarknet_small": None,
+    "cspdarknet_small": None,
     "cspdarknet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet_large.pth",
+    "cspdarknet_huge": None,
 }
 
 # CSPDarkNet
@@ -61,6 +65,37 @@ class CSPDarkNet(nn.Module):
 
 
 # ---------------------------- Functions ----------------------------
+## load pretrained weight
+def load_weight(model, model_name):
+    # load weight
+    print('Loading pretrained weight ...')
+    url = model_urls[model_name]
+    if url is not None:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url=url, map_location="cpu", check_hash=True)
+        # checkpoint state dict
+        checkpoint_state_dict = checkpoint.pop("model")
+        # model state dict
+        model_state_dict = model.state_dict()
+        # check
+        for k in list(checkpoint_state_dict.keys()):
+            if k in model_state_dict:
+                shape_model = tuple(model_state_dict[k].shape)
+                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
+                if shape_model != shape_checkpoint:
+                    checkpoint_state_dict.pop(k)
+            else:
+                checkpoint_state_dict.pop(k)
+                print(k)
+
+        model.load_state_dict(checkpoint_state_dict)
+    else:
+        print('No pretrained for {}'.format(model_name))
+
+    return model
+
+
+## build CSPDarkNet
 def build_backbone(cfg, pretrained=False): 
     """Constructs a darknet-53 model.
     Args:
@@ -69,31 +104,18 @@ def build_backbone(cfg, pretrained=False):
     backbone = CSPDarkNet(cfg['depth'], cfg['width'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     feat_dims = backbone.feat_dims
 
+    # check whether to load imagenet pretrained weight
     if pretrained:
-        if cfg['width'] == 1.0 and cfg['depth'] == 1.0:
-            url = model_urls['cspdarknet_large']
-        if url is not None:
-            print('Loading pretrained weight ...')
-            checkpoint = torch.hub.load_state_dict_from_url(
-                url=url, map_location="cpu", check_hash=True)
-            # checkpoint state dict
-            checkpoint_state_dict = checkpoint.pop("model")
-            # model state dict
-            model_state_dict = backbone.state_dict()
-            # check
-            for k in list(checkpoint_state_dict.keys()):
-                if k in model_state_dict:
-                    shape_model = tuple(model_state_dict[k].shape)
-                    shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
-                    if shape_model != shape_checkpoint:
-                        checkpoint_state_dict.pop(k)
-                else:
-                    checkpoint_state_dict.pop(k)
-                    print(k)
-
-            backbone.load_state_dict(checkpoint_state_dict)
-        else:
-            print('No backbone pretrained: CSPDarkNet53')        
+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_nano')
+        elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_small')
+        elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
+            backbone = load_weight(backbone, model_name='cspdarknet_medium')
+        elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
+            backbone = load_weight(backbone, model_name='cspdarknet_large')
+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
+            backbone = load_weight(backbone, model_name='cspdarknet_huge')
 
     return backbone, feat_dims
 

+ 1 - 1
models/yolov8/yolov8.py

@@ -37,7 +37,7 @@ class YOLOv8(nn.Module):
         self.proj_conv = nn.Conv2d(self.reg_max, 1, kernel_size=1, bias=False)
 
         ## backbone
-        self.backbone, feats_dim = build_backbone(cfg=cfg)
+        self.backbone, feats_dim = build_backbone(cfg, cfg['pretrained']*trainable)
 
         ## neck
         self.neck = build_neck(cfg=cfg, in_dim=feats_dim[-1], out_dim=feats_dim[-1])

+ 3 - 3
models/yolov8/yolov8_backbone.py

@@ -97,7 +97,7 @@ def load_weight(model, model_name):
 
 
 ## build ELAN-Net
-def build_backbone(cfg): 
+def build_backbone(cfg, pretrained=False): 
     # model
     backbone = ELAN_CSPNet(
         width=cfg['width'],
@@ -110,7 +110,7 @@ def build_backbone(cfg):
     feat_dims = backbone.feat_dims
         
     # check whether to load imagenet pretrained weight
-    if cfg['pretrained']:
+    if pretrained:
         if cfg['width'] == 0.25 and cfg['depth'] == 0.34 and cfg['ratio'] == 2.0:
             backbone = load_weight(backbone, model_name='elan_cspnet_nano')
         elif cfg['width'] == 0.5 and cfg['depth'] == 0.34 and cfg['ratio'] == 2.0:
@@ -119,7 +119,7 @@ def build_backbone(cfg):
             backbone = load_weight(backbone, model_name='elan_cspnet_medium')
         elif cfg['width'] == 1.0 and cfg['depth'] == 1.0 and cfg['ratio'] == 1.0:
             backbone = load_weight(backbone, model_name='elan_cspnet_large')
-        elif cfg['width'] == 1.25 and cfg['depth'] == 1.34 and cfg['ratio'] == 1.0:
+        elif cfg['width'] == 1.25 and cfg['depth'] == 1.0 and cfg['ratio'] == 1.0:
             backbone = load_weight(backbone, model_name='elan_cspnet_huge')
 
     return backbone, feat_dims