Bläddra i källkod

add YOLOv4-Tiny

yjh0410 2 år sedan
förälder
incheckning
7127f065b3

+ 27 - 5
README.md

@@ -97,19 +97,28 @@ For example:
 ```Shell
 python train.py --cuda -d coco --root path/to/COCO -v yolov1 -bs 16 --max_epoch 150 --wp_epoch 1 --eval_epoch 10 --fp16 --ema --multi_scale
 ```
-* Detectors reproduced in my book
+* Redesigned YOLOv1~v2:
 
 | Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
 | YOLOv1        | ResNet-18          |  640  |  150  |       |        27.9            |       47.5        |   37.8            |   21.3             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov1_coco.pth) |
 | YOLOv2        | DarkNet-19         |  640  |  150  |       |        32.7            |       50.9        |   53.9            |   30.9             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov2_coco.pth) |
+
+* YOLOv3:
+
+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| YOLOv3-Tiny   | DarkNet-Tiny       |  640  |  250  |       |                        |                   |                   |                    |  |
 | YOLOv3        | DarkNet-53         |  640  |  250  |       |        42.9            |       63.5        |   167.4           |   54.9             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov3_coco.pth) |
+
+* YOLOv4:
+
+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| YOLOv4-Tiny   | CSPDarkNet-Tiny    |  640  |  250  |       |                        |                   |                   |                    |  |
 | YOLOv4        | CSPDarkNet-53      |  640  |  250  |       |        46.6            |       65.8        |   162.7           |   61.5             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolov4_coco.pth) |
-| YOLOX-L       | CSPDarkNet-L       |  640  |  300  |       |        46.6            |       66.1        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
-| YOLOv7-T      | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
-| YOLOv7-L      | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
 
-* Reproduced YOLOv5:
+* YOLOv5:
 
 | Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
@@ -118,6 +127,19 @@ python train.py --cuda -d coco --root path/to/COCO -v yolov1 -bs 16 --max_epoch
 | YOLOv5-M      | CSPDarkNet-M       |  640  |  250  |       |                        |                   |   74.3            |   25.4             |  |
 | YOLOv5-L      | CSPDarkNet-L       |  640  |  250  |       |                        |                   |   155.6           |   54.2             |  |
 
+* YOLOX:
+
+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| YOLOX-L       | CSPDarkNet-L       |  640  |  300  |       |        46.6            |       66.1        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_coco.pth) |
+
+* YOLOv7:
+
+| Model         |   Backbone         | Scale | Epoch |  FPS  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|---------------|--------------------|-------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| YOLOv7-T      | ELANNet-Tiny       |  640  |  300  |       |                        |                   |   22.9            |   8.1              |  |
+| YOLOv7-L      | ELANNet-Large      |  640  |  300  |       |                        |                   |   144.6           |   44.0             |  |
+
 - *All models are trained with ImageNet pretrained weight (IP). All FLOPs are measured with a 640x640 image size on COCO val2017. The FPS is measured with batch size 1 on 3090 GPU from the model inference to the NMS operation.*
 
 - *The reproduced YOLOv5's head is the **Decoupled Head**, which is why the FLOPs and Params are higher than the official YOLOv5. Due to my limited computing resources, I can not align the training configuration with the official YOLOv5, so I cannot fully replicate the official performance. The YOLOv5 I reproduce is for learning purposes only.*

+ 2 - 2
config/__init__.py

@@ -22,8 +22,8 @@ def build_model_config(args):
     elif args.model in ['yolov3', 'yolov3_t']:
         cfg = yolov3_cfg[args.model]
     # YOLOv4
-    elif args.model == 'yolov4':
-        cfg = yolov4_cfg
+    elif args.model in ['yolov4', 'yolov4_t']:
+        cfg = yolov4_cfg[args.model]
     # YOLOv5
     elif args.model in ['yolov5_n', 'yolov5_s', 'yolov5_m', 'yolov5_l', 'yolov5_x']:
         cfg = yolov5_cfg[args.model]

+ 2 - 2
config/yolov3_config.py

@@ -17,7 +17,7 @@ yolov3_cfg = {
         'neck_norm': 'BN',
         'neck_depthwise': False,
         ## FPN
-        'fpn': 'yolo_fpn',
+        'fpn': 'yolov3_fpn',
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
@@ -78,7 +78,7 @@ yolov3_cfg = {
         'neck_norm': 'BN',
         'neck_depthwise': False,
         ## FPN
-        'fpn': 'yolo_fpn',
+        'fpn': 'yolov3_fpn',
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,

+ 122 - 53
config/yolov4_config.py

@@ -1,57 +1,126 @@
 # YOLOv4 Config
 
 yolov4_cfg = {
-    # input
-    'trans_type': 'yolov5_large',
-    'multi_scale': [0.5, 1.0],
-    # model
-    'backbone': 'cspdarknet53',
-    'pretrained': True,
-    'stride': [8, 16, 32],  # P3, P4, P5
-    'width': 1.0,
-    'depth': 1.0,
-    # neck
-    'neck': 'csp_sppf',
-    'expand_ratio': 0.5,
-    'pooling_size': 5,
-    'neck_act': 'silu',
-    'neck_norm': 'BN',
-    'neck_depthwise': False,
-     # fpn
-    'fpn': 'yolo_pafpn',
-    'fpn_act': 'silu',
-    'fpn_norm': 'BN',
-    'fpn_depthwise': False,
-    # head
-    'head': 'decoupled_head',
-    'head_act': 'silu',
-    'head_norm': 'BN',
-    'num_cls_head': 2,
-    'num_reg_head': 2,
-    'head_depthwise': False,
-    'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
-                    [30, 61],   [62, 45],   [59, 119],    # P4
-                    [116, 90],  [156, 198], [373, 326]],  # P5
-    # matcher
-    'iou_thresh': 0.5,
-    # loss weight
-    'loss_obj_weight': 1.0,
-    'loss_cls_weight': 1.0,
-    'loss_box_weight': 5.0,
-    # training configuration
-    'no_aug_epoch': 10,
-    # optimizer
-    'optimizer': 'sgd',        # optional: sgd, adam, adamw
-    'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
-    'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
-    'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
-    # model EMA
-    'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
-    'ema_tau': 2000,
-    # lr schedule
-    'scheduler': 'linear',
-    'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
-    'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
-    'warmup_momentum': 0.8,
-    'warmup_bias_lr': 0.1,
+    'yolov4':{
+        # ---------------- Model config ----------------
+        ## Backbone
+        'backbone': 'cspdarknet53',
+        'pretrained': True,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 1.0,
+        'depth': 1.0,
+        ## Neck
+        'neck': 'csp_sppf',
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
+        'neck_act': 'silu',
+        'neck_norm': 'BN',
+        'neck_depthwise': False,
+        ## FPN
+        'fpn': 'yolov4_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        ## Head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # ---------------- Train config ----------------
+        ## input
+        'trans_type': 'yolov5_large',
+        'multi_scale': [0.5, 1.0],
+        # ---------------- Assignment config ----------------
+        ## matcher
+        'iou_thresh': 0.5,
+        # ---------------- Loss config ----------------
+        ## loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # ---------------- Train config ----------------
+        ## close strong augmentation
+        'no_aug_epoch': 10,
+        ## optimizer
+        'optimizer': 'sgd',        # optional: sgd, AdamW
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: None
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        ## model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        ## lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,              # SGD: 0.01;     AdamW: 0.001
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.01
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
+    'yolov4_t':{
+        # ---------------- Model config ----------------
+        ## Backbone
+        'backbone': 'cspdarknet_tiny',
+        'pretrained': True,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        'width': 0.25,
+        'depth': 0.34,
+        ## Neck
+        'neck': 'csp_sppf',
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
+        'neck_act': 'silu',
+        'neck_norm': 'BN',
+        'neck_depthwise': False,
+        ## FPN
+        'fpn': 'yolov4_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        ## Head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        'anchor_size': [[10, 13],   [16, 30],   [33, 23],     # P3
+                        [30, 61],   [62, 45],   [59, 119],    # P4
+                        [116, 90],  [156, 198], [373, 326]],  # P5
+        # ---------------- Train config ----------------
+        ## input
+        'trans_type': 'yolov5_tiny',
+        'multi_scale': [0.5, 1.0],
+        # ---------------- Assignment config ----------------
+        ## matcher
+        'iou_thresh': 0.5,
+        # ---------------- Loss config ----------------
+        ## loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # ---------------- Train config ----------------
+        ## close strong augmentation
+        'no_aug_epoch': 10,
+        ## optimizer
+        'optimizer': 'sgd',        # optional: sgd, AdamW
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: None
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        ## model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        ## lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,              # SGD: 0.01;     AdamW: 0.001
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.01
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
 }

+ 1 - 1
models/detectors/__init__.py

@@ -31,7 +31,7 @@ def build_model(args,
         model, criterion = build_yolov3(
             args, model_cfg, device, num_classes, trainable)
     # YOLOv4   
-    elif args.model == 'yolov4':
+    elif args.model in ['yolov4', 'yolov4_t']:
         model, criterion = build_yolov4(
             args, model_cfg, device, num_classes, trainable)
     # YOLOv5   

+ 5 - 5
models/detectors/yolov3/yolov3_fpn.py

@@ -5,8 +5,8 @@ import torch.nn.functional as F
 from .yolov3_basic import Conv, ConvBlocks
 
 
-# YoloFPN
-class YoloFPN(nn.Module):
+# Yolov3FPN
+class Yolov3FPN(nn.Module):
     def __init__(self,
                  in_dims=[256, 512, 1024],
                  width=1.0,
@@ -14,7 +14,7 @@ class YoloFPN(nn.Module):
                  out_dim=None,
                  act_type='silu',
                  norm_type='BN'):
-        super(YoloFPN, self).__init__()
+        super(Yolov3FPN, self).__init__()
         self.in_dims = in_dims
         self.out_dim = out_dim
         c3, c4, c5 = in_dims
@@ -75,8 +75,8 @@ class YoloFPN(nn.Module):
 def build_fpn(cfg, in_dims, out_dim=None):
     model = cfg['fpn']
     # build neck
-    if model == 'yolo_fpn':
-        fpn_net = YoloFPN(in_dims=in_dims,
+    if model == 'yolov3_fpn':
+        fpn_net = Yolov3FPN(in_dims=in_dims,
                             out_dim=out_dim,
                             width=cfg['width'],
                             depth=cfg['depth'],

+ 51 - 2
models/detectors/yolov4/yolov4_backbone.py

@@ -8,10 +8,12 @@ except:
     
 
 model_urls = {
+    "cspdarknet_tiny": None,
     "cspdarknet53": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/cspdarknet53_silu.pth",
 }
 
 # --------------------- CSPDarkNet-53 -----------------------
+## CSPDarkNet-53
 class CSPDarkNet53(nn.Module):
     def __init__(self, act_type='silu', norm_type='BN'):
         super(CSPDarkNet53, self).__init__()
@@ -56,6 +58,50 @@ class CSPDarkNet53(nn.Module):
 
         return outputs
 
+## CSPDarkNet-Tiny
+class CSPDarkNetTiny(nn.Module):
+    def __init__(self, act_type='silu', norm_type='BN'):
+        super(CSPDarkNetTiny, self).__init__()
+        self.feat_dims = [64, 128, 256]
+
+        # stride = 2
+        self.layer_1 = nn.Sequential(
+            Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            CSPBlock(16, 16, expand_ratio=0.5, nblocks=1, shortcut=True, act_type=act_type, norm_type=norm_type)
+        )
+        # stride = 4
+        self.layer_2 = nn.Sequential(
+            Conv(16, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            CSPBlock(32, 32, expand_ratio=0.5, nblocks=1, shortcut=True, act_type=act_type, norm_type=norm_type)
+        )
+        # stride = 8
+        self.layer_3 = nn.Sequential(
+            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            CSPBlock(64, 64, expand_ratio=0.5, nblocks=3, shortcut=True, act_type=act_type, norm_type=norm_type)
+        )
+        # stride = 16
+        self.layer_4 = nn.Sequential(
+            Conv(64, 128, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            CSPBlock(128, 128, expand_ratio=0.5, nblocks=3, shortcut=True, act_type=act_type, norm_type=norm_type)
+        )
+        # stride = 32
+        self.layer_5 = nn.Sequential(
+            Conv(128, 256, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            CSPBlock(256, 256, expand_ratio=0.5, nblocks=2, shortcut=True, act_type=act_type, norm_type=norm_type)
+        )
+
+
+    def forward(self, x):
+        c1 = self.layer_1(x)
+        c2 = self.layer_2(c1)
+        c3 = self.layer_3(c2)
+        c4 = self.layer_4(c3)
+        c5 = self.layer_5(c4)
+
+        outputs = [c3, c4, c5]
+
+        return outputs
+
 
 # --------------------- Functions -----------------------
 def build_backbone(model_name='cspdarknet53', pretrained=False): 
@@ -66,9 +112,12 @@ def build_backbone(model_name='cspdarknet53', pretrained=False):
     if model_name == 'cspdarknet53':
         backbone = CSPDarkNet53(act_type='silu', norm_type='BN')
         feat_dims = backbone.feat_dims
+    elif model_name == 'cspdarknet_tiny':
+        backbone = CSPDarkNetTiny(act_type='silu', norm_type='BN')
+        feat_dims = backbone.feat_dims
 
     if pretrained:
-        url = model_urls['cspdarknet53']
+        url = model_urls[model_name]
         if url is not None:
             print('Loading pretrained weight ...')
             checkpoint = torch.hub.load_state_dict_from_url(
@@ -98,7 +147,7 @@ def build_backbone(model_name='cspdarknet53', pretrained=False):
 if __name__ == '__main__':
     import time
     from thop import profile
-    model, feats = build_backbone(pretrained=False)
+    model, feats = build_backbone(model_name='cspdarknet_tiny', pretrained=False)
     x = torch.randn(1, 3, 224, 224)
     t0 = time.time()
     outputs = model(x)

+ 4 - 4
models/detectors/yolov4/yolov4_fpn.py

@@ -5,7 +5,7 @@ from .yolov4_basic import Conv, CSPBlock
 
 
 # PaFPN-CSP
-class YoloPaFPN(nn.Module):
+class Yolov4PaFPN(nn.Module):
     def __init__(self, 
                  in_dims=[256, 512, 1024],
                  out_dim=256,
@@ -14,7 +14,7 @@ class YoloPaFPN(nn.Module):
                  act_type='silu',
                  norm_type='BN',
                  depthwise=False):
-        super(YoloPaFPN, self).__init__()
+        super(Yolov4PaFPN, self).__init__()
         self.in_dims = in_dims
         self.out_dim = out_dim
         c3, c4, c5 = in_dims
@@ -123,8 +123,8 @@ class YoloPaFPN(nn.Module):
 def build_fpn(cfg, in_dims, out_dim=None):
     model = cfg['fpn']
     # build neck
-    if model == 'yolo_pafpn':
-        fpn_net = YoloPaFPN(in_dims=in_dims,
+    if model == 'yolov4_pafpn':
+        fpn_net = Yolov4PaFPN(in_dims=in_dims,
                              out_dim=out_dim,
                              width=cfg['width'],
                              depth=cfg['depth'],