瀏覽代碼

add YOLOv7-N & T & H

yjh0410 2 年之前
父節點
當前提交
4dcd885695

+ 15 - 3
config/__init__.py

@@ -41,7 +41,11 @@ def build_model_config(args):
 
 
 # ------------------ Transform Config ----------------------
-from .transform_config import yolov5_strong_trans_config, yolov5_weak_trans_config, yolox_trans_config, ssd_trans_config
+from .transform_config import (
+    yolov5_strong_trans_config, yolov5_weak_trans_config, yolov5_nano_trans_config,
+    yolox_strong_trans_config, yolox_weak_trans_config, yolox_nano_trans_config,
+    ssd_trans_config
+)
 
 def build_trans_config(trans_config='ssd'):
     print('==============================')
@@ -49,13 +53,21 @@ def build_trans_config(trans_config='ssd'):
     # SSD-style transform 
     if trans_config == 'ssd':
         cfg = ssd_trans_config
+
     # YOLOv5-style transform 
     elif trans_config == 'yolov5_strong':
         cfg = yolov5_strong_trans_config
     elif trans_config == 'yolov5_weak':
         cfg = yolov5_weak_trans_config
+    elif trans_config == 'yolov5_nano':
+        cfg = yolov5_nano_trans_config
+        
     # YOLOX-style transform 
-    elif trans_config == 'yolox':
-        cfg = yolox_trans_config
+    elif trans_config == 'yolox_strong':
+        cfg = yolox_strong_trans_config
+    elif trans_config == 'yolox_weak':
+        cfg = yolox_weak_trans_config
+    elif trans_config == 'yolox_nano':
+        cfg = yolox_nano_trans_config
 
     return cfg

+ 61 - 2
config/transform_config.py

@@ -1,6 +1,6 @@
 # transform config
 
-
+# ----------------------- YOLOv5-Style -----------------------
 yolov5_strong_trans_config = {
     'aug_type': 'yolov5',
     # Basic Augment
@@ -39,7 +39,27 @@ yolov5_weak_trans_config = {
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
 }
 
-yolox_trans_config = {
+yolov5_nano_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.1,
+    'scale': 0.5,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 0.5,
+    'mixup_prob': 0.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup',
+    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
+}
+
+# ----------------------- YOLOX-Style -----------------------
+yolox_strong_trans_config = {
     'aug_type': 'yolov5',
     # Basic Augment
     'degrees': 0.0,
@@ -58,6 +78,45 @@ yolox_trans_config = {
     'mixup_scale': [0.5, 1.5]
 }
 
+yolox_weak_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.1,
+    'scale': 0.5,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 0.15,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolox_mixup',
+    'mixup_scale': [0.5, 1.5]
+}
+
+yolox_nano_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.1,
+    'scale': 0.5,
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 0.5,
+    'mixup_prob': 0.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolox_mixup',
+    'mixup_scale': [0.5, 1.5]
+}
+
+# ----------------------- SSD-Style -----------------------
 ssd_trans_config = {
     'aug_type': 'ssd',
     # Mosaic & Mixup are not used for SSD-style augmentation

+ 123 - 0
config/yolov7_config.py

@@ -1,6 +1,64 @@
 # YOLOv7 Config
 
 yolov7_cfg = {
+    'yolov7_nano':{
+        # input
+        'trans_type': 'yolov5_nano',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'elannet_nano',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': True,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        # neck
+        'neck': 'sppf',
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
+        'neck_act': 'silu',
+        'neck_norm': 'BN',
+        'neck_depthwise': True,
+        # fpn
+        'fpn': 'yolov7_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': True,
+        'nbranch': 2.0,
+        'width': 0.25,
+        'depth': 1.0,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': True,
+        # matcher
+        'matcher': {'center_sampling_radius': 2.5,
+                    'topk_candicate': 10},
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 20,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
     'yolov7_tiny':{
         # input
         'trans_type': 'yolov5_weak',
@@ -24,6 +82,9 @@ yolov7_cfg = {
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
+        'nbranch': 2.0,
+        'width': 0.5,
+        'depth': 1.0,
         # head
         'head': 'decoupled_head',
         'head_act': 'silu',
@@ -79,6 +140,9 @@ yolov7_cfg = {
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
+        'nbranch': 4.0,
+        'depth': 1.0,
+        'width': 1.0,
         # head
         'head': 'decoupled_head',
         'head_act': 'silu',
@@ -110,4 +174,63 @@ yolov7_cfg = {
         'warmup_momentum': 0.8,
         'warmup_bias_lr': 0.1,
     },
+
+    'yolov7_huge':{
+        # input
+        'trans_type': 'yolov5_strong',
+        'multi_scale': [0.5, 1.0],
+        # model
+        'backbone': 'elannet_huge',
+        'pretrained': True,
+        'bk_act': 'silu',
+        'bk_norm': 'BN',
+        'bk_dpw': False,
+        'stride': [8, 16, 32],  # P3, P4, P5
+        # neck
+        'neck': 'csp_sppf',
+        'expand_ratio': 0.5,
+        'pooling_size': 5,
+        'neck_act': 'silu',
+        'neck_norm': 'BN',
+        'neck_depthwise': False,
+        # fpn
+        'fpn': 'yolov7_pafpn',
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+        'nbranch': 4.0,
+        'depth': 2.0,
+        'width': 1.25,
+        # head
+        'head': 'decoupled_head',
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_depthwise': False,
+        # matcher
+        'matcher': {'center_sampling_radius': 2.5,
+                    'topk_candicate': 10},
+        # loss weight
+        'loss_obj_weight': 1.0,
+        'loss_cls_weight': 1.0,
+        'loss_box_weight': 5.0,
+        # training configuration
+        'no_aug_epoch': 20,
+        # optimizer
+        'optimizer': 'sgd',        # optional: sgd, adam, adamw
+        'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
+        'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
+        'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
+        # model EMA
+        'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+        'ema_tau': 2000,
+        # lr schedule
+        'scheduler': 'linear',
+        'lr0': 0.01,               # SGD: 0.01;     AdamW: 0.004
+        'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
+        'warmup_momentum': 0.8,
+        'warmup_bias_lr': 0.1,
+    },
+
 }

+ 1 - 1
config/yolox_config.py

@@ -2,7 +2,7 @@
 
 yolox_cfg = {
     # input
-    'trans_type': 'yolox',
+    'trans_type': 'yolox_strong',
     'multi_scale': [0.5, 1.0],
     # model
     'backbone': 'cspdarknet',

+ 1 - 1
models/yolov7/yolov7.py

@@ -38,7 +38,7 @@ class YOLOv7(nn.Module):
         feats_dim[-1] = self.neck.out_dim
 
         ## 颈部网络: 特征金字塔
-        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=256)
+        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))
         self.head_dim = self.fpn.out_dim
 
         ## 检测头

+ 163 - 15
models/yolov7/yolov7_backbone.py

@@ -8,15 +8,111 @@ except:
     
 
 model_urls = {
+    "elannet_nano": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_nano.pth",
+    "elannet_tiny": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_tiny.pth",
     "elannet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_large.pth",
+    "elannet_huge": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_huge.pth",
 }
 
+
 # --------------------- ELANNet -----------------------
-# ELANNet-Large
-class ELANNet_Lagre(nn.Module):
+# ELANNet-Nano
+class ELANNet_Nano(nn.Module):
+    def __init__(self, act_type='lrelu', norm_type='BN', depthwise=True):
+        super(ELANNet_Nano, self).__init__()
+        self.feat_dims = [64, 128, 256]
+        
+        # P1/2
+        self.layer_1 = Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        # P2/4
+        self.layer_2 = nn.Sequential(   
+            Conv(16, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
+            ELANBlock(in_dim=32, out_dim=32, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P3/8
+        self.layer_3 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=32, out_dim=64, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P4/16
+        self.layer_4 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P5/32
+        self.layer_5 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+
+
+    def forward(self, x):
+        c1 = self.layer_1(x)
+        c2 = self.layer_2(c1)
+        c3 = self.layer_3(c2)
+        c4 = self.layer_4(c3)
+        c5 = self.layer_5(c4)
+
+        outputs = [c3, c4, c5]
+
+        return outputs
+
+
+# ELANNet-Tiny
+class ELANNet_Tiny(nn.Module):
     """
-    ELAN-Net of YOLOv7-L.
+    ELAN-Net of YOLOv7-Tiny.
     """
+    def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
+        super(ELANNet_Tiny, self).__init__()
+        self.feat_dims = [128, 256, 512]
+        
+        # P1/2
+        self.layer_1 = Conv(3, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        # P2/4
+        self.layer_2 = nn.Sequential(   
+            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
+            ELANBlock(in_dim=64, out_dim=64, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P3/8
+        self.layer_3 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P4/16
+        self.layer_4 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P5/32
+        self.layer_5 = nn.Sequential(
+            nn.MaxPool2d((2, 2), 2),             
+            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=1,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+
+
+    def forward(self, x):
+        c1 = self.layer_1(x)
+        c2 = self.layer_2(c1)
+        c3 = self.layer_3(c2)
+        c4 = self.layer_4(c3)
+        c5 = self.layer_5(c4)
+
+        outputs = [c3, c4, c5]
+
+        return outputs
+
+
+## ELANNet-Large
+class ELANNet_Lagre(nn.Module):
     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANNet_Lagre, self).__init__()
         self.feat_dims = [512, 1024, 1024]
@@ -30,25 +126,75 @@ class ELANNet_Lagre(nn.Module):
         # P2/4
         self.layer_2 = nn.Sequential(   
             Conv(64, 128, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5,
+            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=2,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P3/8
+        self.layer_3 = nn.Sequential(
+            DownSample(in_dim=256, out_dim=256, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=2,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P4/16
+        self.layer_4 = nn.Sequential(
+            DownSample(in_dim=512, out_dim=512, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=512, out_dim=1024, expand_ratio=0.5, depth=2,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P5/32
+        self.layer_5 = nn.Sequential(
+            DownSample(in_dim=1024, out_dim=1024, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=1024, out_dim=1024, expand_ratio=0.25, depth=2,
+                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+
+
+    def forward(self, x):
+        c1 = self.layer_1(x)
+        c2 = self.layer_2(c1)
+        c3 = self.layer_3(c2)
+        c4 = self.layer_4(c3)
+        c5 = self.layer_5(c4)
+
+        outputs = [c3, c4, c5]
+
+        return outputs
+
+
+## ELANNet-Huge
+class ELANNet_Huge(nn.Module):
+    def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
+        super(ELANNet_Huge, self).__init__()
+        self.feat_dims = [640, 1280, 1280]
+        
+        # P1/2
+        self.layer_1 = nn.Sequential(
+            Conv(3, 40, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            Conv(40, 80, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            Conv(80, 80, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        )
+        # P2/4
+        self.layer_2 = nn.Sequential(   
+            Conv(80, 160, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=160, out_dim=320, expand_ratio=0.5, depth=3,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P3/8
         self.layer_3 = nn.Sequential(
-            DownSample(in_dim=256, act_type=act_type),             
-            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5,
+            DownSample(in_dim=320, out_dim=320, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=320, out_dim=640, expand_ratio=0.5, depth=3,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P4/16
         self.layer_4 = nn.Sequential(
-            DownSample(in_dim=512, act_type=act_type),             
-            ELANBlock(in_dim=512, out_dim=1024, expand_ratio=0.5,
+            DownSample(in_dim=640, out_dim=640, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=640, out_dim=1280, expand_ratio=0.5, depth=3,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P5/32
         self.layer_5 = nn.Sequential(
-            DownSample(in_dim=1024, act_type=act_type),             
-            ELANBlock(in_dim=1024, out_dim=1024, expand_ratio=0.25,
+            DownSample(in_dim=1280, out_dim=1280, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(in_dim=1280, out_dim=1280, expand_ratio=0.25, depth=3,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
 
@@ -71,16 +217,19 @@ def build_backbone(cfg, pretrained=False):
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
+    # build backbone
     if cfg['backbone'] == 'elannet_huge':
-        backbone = None
+        backbone = ELANNet_Huge(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     elif cfg['backbone'] == 'elannet_large':
         backbone = ELANNet_Lagre(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     elif cfg['backbone'] == 'elannet_tiny':
-        backbone = None
+        backbone = ELANNet_Tiny(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     elif cfg['backbone'] == 'elannet_nano':
-        backbone = None
+        backbone = ELANNet_Nano(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
+    # pyramid feat dims
     feat_dims = backbone.feat_dims
 
+    # load imagenet pretrained weight
     if pretrained:
         url = model_urls[cfg['backbone']]
         if url is not None:
@@ -114,7 +263,7 @@ if __name__ == '__main__':
     from thop import profile
     cfg = {
         'pretrained': False,
-
+        'backbone': 'elannet_huge',
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
@@ -130,7 +279,6 @@ if __name__ == '__main__':
     for out in outputs:
         print(out.shape)
 
-    x = torch.randn(1, 3, 224, 224)
     print('==============================')
     flops, params = profile(model, inputs=(x, ), verbose=False)
     print('==============================')

+ 13 - 36
models/yolov7/yolov7_basic.py

@@ -79,18 +79,18 @@ class Conv(nn.Module):
 
 # ELAN Block
 class ELANBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, expand_ratio=0.5, depth=2.0, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANBlock, self).__init__()
         inter_dim = int(in_dim * expand_ratio)
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv3 = nn.Sequential(*[
             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            for _ in range(2)
+            for _ in range(round(depth))
         ])
         self.cv4 = nn.Sequential(*[
             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            for _ in range(2)
+            for _ in range(round(depth))
         ])
 
         self.out = Conv(inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
@@ -107,48 +107,25 @@ class ELANBlock(nn.Module):
         return out
 
 
-# DownSample Block
-class DownSample(nn.Module):
-    def __init__(self, in_dim, act_type='silu', norm_type='BN'):
-        super().__init__()
-        inter_dim = in_dim // 2
-        self.mp = nn.MaxPool2d((2, 2), 2)
-        self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.cv2 = nn.Sequential(
-            Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type),
-            Conv(inter_dim, inter_dim, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type)
-        )
-
-    def forward(self, x):
-        x1 = self.cv1(self.mp(x))
-        x2 = self.cv2(x)
-        out = torch.cat([x1, x2], dim=1)
-
-        return out
-
-
 # ELAN Block for PaFPN
 class ELANBlockFPN(nn.Module):
-    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, expand_ratio=0.5, nbranch=4, depth=1, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANBlockFPN, self).__init__()
         # Basic parameters
-        e1, e2 = 0.5, 0.5
-        width = 4
-        depth = 1
-        inter_dim = int(in_dim * e1)
-        inter_dim2 = int(inter_dim * e2) 
+        inter_dim = int(in_dim * expand_ratio)
+        inter_dim2 = int(inter_dim * expand_ratio) 
         # Network structure
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv3 = nn.ModuleList()
-        for idx in range(width):
+        for idx in range(round(nbranch)):
             if idx == 0:
                 cvs = [Conv(inter_dim, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
             else:
                 cvs = [Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
             # deeper
-            if depth > 1:
-                for _ in range(1, depth):
+            if round(depth) > 1:
+                for _ in range(1, round(depth)):
                     cvs.append(Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise))
                 self.cv3.append(nn.Sequential(*cvs))
             else:
@@ -170,11 +147,11 @@ class ELANBlockFPN(nn.Module):
         return out
 
 
-# DownSample Block for PaFPN
-class DownSampleFPN(nn.Module):
-    def __init__(self, in_dim, act_type='silu', norm_type='BN', depthwise=False):
+# DownSample Block
+class DownSample(nn.Module):
+    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
         super().__init__()
-        inter_dim = in_dim
+        inter_dim = out_dim // 2
         self.mp = nn.MaxPool2d((2, 2), 2)
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = nn.Sequential(

+ 41 - 23
models/yolov7/yolov7_fpn.py

@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .yolov7_basic import Conv, ELANBlockFPN, DownSampleFPN
+from .yolov7_basic import Conv, ELANBlockFPN, DownSample
 
 
 # PaFPN-ELAN (YOLOv7's)
@@ -9,6 +9,9 @@ class Yolov7PaFPN(nn.Module):
     def __init__(self, 
                  in_dims=[512, 1024, 512],
                  out_dim=None,
+                 width=1.0,
+                 depth=1.0,
+                 nbranch=4.0,
                  act_type='silu',
                  norm_type='BN',
                  depthwise=False):
@@ -18,19 +21,25 @@ class Yolov7PaFPN(nn.Module):
 
         # top dwon
         ## P5 -> P4
-        self.reduce_layer_1 = Conv(c5, 256, k=1, norm_type=norm_type, act_type=act_type)
-        self.reduce_layer_2 = Conv(c4, 256, k=1, norm_type=norm_type, act_type=act_type)
-        self.top_down_layer_1 = ELANBlockFPN(in_dim=256 + 256,
-                                             out_dim=256,
+        self.reduce_layer_1 = Conv(c5, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
+        self.reduce_layer_2 = Conv(c4, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
+        self.top_down_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
+                                             out_dim=round(256*width),
+                                             expand_ratio=0.5,
+                                             nbranch=nbranch,
+                                             depth=depth,
                                              act_type=act_type,
                                              norm_type=norm_type,
                                              depthwise=depthwise
                                              )
         # P4 -> P3
-        self.reduce_layer_3 = Conv(256, 128, k=1, norm_type=norm_type, act_type=act_type)
-        self.reduce_layer_4 = Conv(c3, 128, k=1, norm_type=norm_type, act_type=act_type)
-        self.top_down_layer_2 = ELANBlockFPN(in_dim=128 + 128,
-                                             out_dim=128,
+        self.reduce_layer_3 = Conv(round(256*width), round(128*width), k=1, norm_type=norm_type, act_type=act_type)
+        self.reduce_layer_4 = Conv(c3, round(128*width), k=1, norm_type=norm_type, act_type=act_type)
+        self.top_down_layer_2 = ELANBlockFPN(in_dim=round(128*width) + round(128*width),
+                                             out_dim=round(128*width),
+                                             expand_ratio=0.5,
+                                             nbranch=nbranch,
+                                             depth=depth,
                                              act_type=act_type,
                                              norm_type=norm_type,
                                              depthwise=depthwise
@@ -38,30 +47,36 @@ class Yolov7PaFPN(nn.Module):
 
         # bottom up
         # P3 -> P4
-        self.downsample_layer_1 = DownSampleFPN(128, act_type=act_type,
-                                    norm_type=norm_type, depthwise=depthwise)
-        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=256 + 256,
-                                              out_dim=256,
+        self.downsample_layer_1 = DownSample(in_dim=round(128*width), out_dim=round(256*width),
+                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
+                                              out_dim=round(256*width),
+                                              expand_ratio=0.5,
+                                              nbranch=nbranch,
+                                              depth=depth,
                                               act_type=act_type,
                                               norm_type=norm_type,
                                               depthwise=depthwise
                                               )
         # P4 -> P5
-        self.downsample_layer_2 = DownSampleFPN(256, act_type=act_type,
-                                    norm_type=norm_type, depthwise=depthwise)
-        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=512 + c5,
-                                              out_dim=512,
+        self.downsample_layer_2 = DownSample(in_dim=round(256*width), out_dim=round(512*width),
+                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=round(512*width) + c5,
+                                              out_dim=round(512*width),
+                                              expand_ratio=0.5,
+                                              nbranch=nbranch,
+                                              depth=depth,
                                               act_type=act_type,
                                               norm_type=norm_type,
                                               depthwise=depthwise
                                               )
         
         # head conv
-        self.head_conv_1 = Conv(128, 256, k=3, p=1,
+        self.head_conv_1 = Conv(round(128*width), round(256*width), k=3, p=1,
                                 act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.head_conv_2 = Conv(256, 512, k=3, p=1,
+        self.head_conv_2 = Conv(round(256*width), round(512*width), k=3, p=1,
                                 act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.head_conv_3 = Conv(512, 1024, k=3, p=1,
+        self.head_conv_3 = Conv(round(512*width), round(1024*width), k=3, p=1,
                                 act_type=act_type, norm_type=norm_type, depthwise=depthwise)
 
         # output proj layers
@@ -69,12 +84,12 @@ class Yolov7PaFPN(nn.Module):
             self.out_layers = nn.ModuleList([
                 Conv(in_dim, out_dim, k=1,
                      norm_type=norm_type, act_type=act_type)
-                     for in_dim in [256, 512, 1024]
+                     for in_dim in [round(256*width), round(512*width), round(1024*width)]
                      ])
             self.out_dim = [out_dim] * 3
         else:
             self.out_layers = None
-            self.out_dim = [256, 512, 1024]
+            self.out_dim = [round(256*width), round(512*width), round(1024*width)]
 
 
     def forward(self, features):
@@ -120,10 +135,13 @@ class Yolov7PaFPN(nn.Module):
 
 def build_fpn(cfg, in_dims, out_dim=None):
     model = cfg['fpn']
-    # build neck
+    # build pafpn
     if model == 'yolov7_pafpn':
         fpn_net = Yolov7PaFPN(in_dims=in_dims,
                              out_dim=out_dim,
+                             width=cfg['width'],
+                             depth=cfg['depth'],
+                             nbranch=cfg['nbranch'],
                              act_type=cfg['fpn_act'],
                              norm_type=cfg['fpn_norm'],
                              depthwise=cfg['fpn_depthwise']