Kaynağa Gözat

optimize yolov7 codes

yjh0410 2 yıl önce
ebeveyn
işleme
93b7481820

+ 9 - 9
config/model_config/yolov7_config.py

@@ -23,9 +23,9 @@ yolov7_cfg = {
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
-        'nbranch': 2.0,       # number of branch in ELANBlockFPN
-        'depth': 1.0,         # depth factor of each branch in ELANBlockFPN
-        'width': 0.5,         # width factor of channel in FPN
+        'branch_width': 2,           # number of branch in ELANBlockFPN
+        'branch_depth': 1,           # number of each branch's depth in ELANBlockFPN
+        'channel_width': 0.5,        # width factor of channels in FPN
         ## Head
         'head': 'decoupled_head',
         'head_act': 'silu',
@@ -72,9 +72,9 @@ yolov7_cfg = {
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
-        'nbranch': 4.0,       # number of branch in ELANBlockFPN
-        'depth': 1.0,         # depth factor of each branch in ELANBlockFPN
-        'width': 1.0,         # width factor of channel in FPN
+        'branch_width': 4,           # number of branch in ELANBlockFPN
+        'branch_depth': 1,           # number of each branch's depth in ELANBlockFPN
+        'channel_width': 1.0,        # width factor of channel in FPN
         # head
         'head': 'decoupled_head',
         'head_act': 'silu',
@@ -121,9 +121,9 @@ yolov7_cfg = {
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
-        'nbranch': 4.0,        # number of branch in ELANBlockFPN
-        'depth': 2.0,          # depth factor of each branch in ELANBlockFPN
-        'width': 1.25,         # width factor of channel in FPN
+        'branch_width': 4,             # number of branch in ELANBlockFPN
+        'branch_depth': 2,             # # number of each branch's depth in ELANBlockFPN
+        'channel_width': 1.25,         # width factor of channel in FPN
         ## Head
         'head': 'decoupled_head',
         'head_act': 'silu',

+ 2 - 0
eval.py

@@ -52,6 +52,8 @@ def parse_args():
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=None, type=float,
                         help='mixup augmentation.')
+    parser.add_argument('--load_cache', action='store_true', default=False,
+                        help='load data into memory.')
 
     # TTA
     parser.add_argument('-tta', '--test_aug', action='store_true', default=False,

+ 13 - 3
models/detectors/yolov7/yolov7.py

@@ -40,7 +40,7 @@ class YOLOv7(nn.Module):
         feats_dim[-1] = self.neck.out_dim
 
         ## 颈部网络: 特征金字塔
-        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))
+        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['channel_width']))
         self.head_dim = self.fpn.out_dim
 
         ## 检测头
@@ -216,9 +216,11 @@ class YOLOv7(nn.Module):
 
             # 检测头
             all_anchors = []
+            all_strides = []
             all_obj_preds = []
             all_cls_preds = []
             all_box_preds = []
+            all_reg_preds = []
             for level, (feat, head) in enumerate(zip(pyramid_feats, self.non_shared_heads)):
                 cls_feat, reg_feat = head(feat)
 
@@ -232,6 +234,9 @@ class YOLOv7(nn.Module):
                 # generate anchor boxes: [M, 4]
                 anchors = self.generate_anchors(level, fmp_size)
                 
+                # stride tensor: [M, 1]
+                stride_tensor = torch.ones_like(anchors[..., :1]) * self.stride[level]
+
                 # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
                 obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
                 cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
@@ -247,13 +252,18 @@ class YOLOv7(nn.Module):
                 all_obj_preds.append(obj_pred)
                 all_cls_preds.append(cls_pred)
                 all_box_preds.append(box_pred)
+                all_reg_preds.append(reg_pred)
                 all_anchors.append(anchors)
+                all_strides.append(stride_tensor)
             
             # output dict
             outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
                        "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
                        "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
-                       "anchors": all_anchors,           # List(Tensor) [B, M, 2]
-                       'strides': self.stride}           # List(Int) [8, 16, 32]
+                       "pred_reg": all_reg_preds,        # List(Tensor) [B, M, 4]
+                       "anchors": all_anchors,           # List(Tensor) [M, 2]
+                       "strides": self.stride,           # List(Int) [8, 16, 32]
+                       "stride_tensors": all_strides     # List(Tensor) [M, 1]
+                       }
 
             return outputs 

+ 64 - 120
models/detectors/yolov7/yolov7_backbone.py

@@ -8,7 +8,6 @@ except:
     
 
 model_urls = {
-    "elannet_nano": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_nano.pth",
     "elannet_tiny": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_tiny.pth",
     "elannet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_large.pth",
     "elannet_huge": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_huge.pth",
@@ -16,86 +15,40 @@ model_urls = {
 
 
 # --------------------- ELANNet -----------------------
-# ELANNet-Nano
-class ELANNet_Nano(nn.Module):
-    def __init__(self, act_type='lrelu', norm_type='BN', depthwise=True):
-        super(ELANNet_Nano, self).__init__()
-        self.feat_dims = [64, 128, 256]
-        
-        # P1/2
-        self.layer_1 = Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        # P2/4
-        self.layer_2 = nn.Sequential(   
-            Conv(16, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
-            ELANBlock(in_dim=32, out_dim=32, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-        # P3/8
-        self.layer_3 = nn.Sequential(
-            nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=32, out_dim=64, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-        # P4/16
-        self.layer_4 = nn.Sequential(
-            nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-        # P5/32
-        self.layer_5 = nn.Sequential(
-            nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-
-
-    def forward(self, x):
-        c1 = self.layer_1(x)
-        c2 = self.layer_2(c1)
-        c3 = self.layer_3(c2)
-        c4 = self.layer_4(c3)
-        c5 = self.layer_5(c4)
-
-        outputs = [c3, c4, c5]
-
-        return outputs
-
-
-# ELANNet-Tiny
+## ELANNet-Tiny
 class ELANNet_Tiny(nn.Module):
     """
     ELAN-Net of YOLOv7-Tiny.
     """
     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANNet_Tiny, self).__init__()
-        self.feat_dims = [128, 256, 512]
+        # -------------- Basic parameters --------------
+        self.feat_dims = [32, 64, 128, 256, 512]
+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.5]  # Stage-1 -> Stage-4
+        self.branch_depths = [1, 1, 1, 1]            # Stage-1 -> Stage-4
         
-        # P1/2
-        self.layer_1 = Conv(3, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        # P2/4
+        # -------------- Network parameters --------------
+        ## P1/2
+        self.layer_1 = Conv(3, self.feat_dims[0], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        ## P2/4: Stage-1
         self.layer_2 = nn.Sequential(   
-            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
-            ELANBlock(in_dim=64, out_dim=64, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
+            ELANBlock(self.feat_dims[1], self.feat_dims[1], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P3/8
+        ## P3/8: Stage-2
         self.layer_3 = nn.Sequential(
             nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            ELANBlock(self.feat_dims[1], self.feat_dims[2], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P4/16
+        ## P4/16: Stage-3
         self.layer_4 = nn.Sequential(
             nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P5/32
+        ## P5/32: Stage-4
         self.layer_5 = nn.Sequential(
             nn.MaxPool2d((2, 2), 2),             
-            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
 
 
@@ -110,42 +63,41 @@ class ELANNet_Tiny(nn.Module):
 
         return outputs
 
-
 ## ELANNet-Large
 class ELANNet_Lagre(nn.Module):
     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANNet_Lagre, self).__init__()
-        self.feat_dims = [512, 1024, 1024]
-        
-        # P1/2
+        # -------------------- Basic parameters --------------------
+        self.feat_dims = [32, 64, 128, 256, 512, 1024, 1024]
+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25] # Stage-1 -> Stage-4
+        self.branch_depths = [2, 2, 2, 2]            # Stage-1 -> Stage-4
+
+        # -------------------- Network parameters --------------------
+        ## P1/2
         self.layer_1 = nn.Sequential(
-            Conv(3, 32, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
-            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            Conv(64, 64, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            Conv(3, self.feat_dims[0], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            Conv(self.feat_dims[1], self.feat_dims[1], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P2/4
+        ## P2/4: Stage-1
         self.layer_2 = nn.Sequential(   
-            Conv(64, 128, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=2,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P3/8
+        ## P3/8: Stage-2
         self.layer_3 = nn.Sequential(
-            DownSample(in_dim=256, out_dim=256, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=2,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[3], self.feat_dims[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P4/16
+        ## P4/16: Stage-3
         self.layer_4 = nn.Sequential(
-            DownSample(in_dim=512, out_dim=512, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=512, out_dim=1024, expand_ratio=0.5, depth=2,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[4], self.feat_dims[4], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P5/32
+        ## P5/32: Stage-4
         self.layer_5 = nn.Sequential(
-            DownSample(in_dim=1024, out_dim=1024, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=1024, out_dim=1024, expand_ratio=0.25, depth=2,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[5], self.feat_dims[5], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
 
 
@@ -160,42 +112,41 @@ class ELANNet_Lagre(nn.Module):
 
         return outputs
 
-
 ## ELANNet-Huge
 class ELANNet_Huge(nn.Module):
     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANNet_Huge, self).__init__()
-        self.feat_dims = [640, 1280, 1280]
-        
-        # P1/2
+        # -------------------- Basic parameters --------------------
+        self.feat_dims = [40, 80, 160, 320, 640, 1280, 1280]
+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25] # Stage-1 -> Stage-4
+        self.branch_depths = [3, 3, 3, 3]            # Stage-1 -> Stage-4
+
+        # -------------------- Network parameters --------------------
+        ## P1/2
         self.layer_1 = nn.Sequential(
-            Conv(3, 40, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            Conv(40, 80, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            Conv(80, 80, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            Conv(3, self.feat_dims[0], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            Conv(self.feat_dims[1], self.feat_dims[1], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P2/4
+        ## P2/4: Stage-1
         self.layer_2 = nn.Sequential(   
-            Conv(80, 160, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=160, out_dim=320, expand_ratio=0.5, depth=3,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P3/8
+        ## P3/8: Stage-2
         self.layer_3 = nn.Sequential(
-            DownSample(in_dim=320, out_dim=320, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=320, out_dim=640, expand_ratio=0.5, depth=3,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[3], self.feat_dims[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P4/16
+        ## P4/16: Stage-3
         self.layer_4 = nn.Sequential(
-            DownSample(in_dim=640, out_dim=640, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=640, out_dim=1280, expand_ratio=0.5, depth=3,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[4], self.feat_dims[4], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
-        # P5/32
+        ## P5/32: Stage-4
         self.layer_5 = nn.Sequential(
-            DownSample(in_dim=1280, out_dim=1280, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
-            ELANBlock(in_dim=1280, out_dim=1280, expand_ratio=0.25, depth=3,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            DownSample(self.feat_dims[5], self.feat_dims[5], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
 
 
@@ -212,11 +163,8 @@ class ELANNet_Huge(nn.Module):
 
 
 # --------------------- Functions -----------------------
+## build backbone
 def build_backbone(cfg, pretrained=False): 
-    """Constructs a ELANNet model.
-    Args:
-        pretrained (bool): If True, returns a model pre-trained on ImageNet
-    """
     # build backbone
     if cfg['backbone'] == 'elannet_huge':
         backbone = ELANNet_Huge(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
@@ -224,10 +172,8 @@ def build_backbone(cfg, pretrained=False):
         backbone = ELANNet_Lagre(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     elif cfg['backbone'] == 'elannet_tiny':
         backbone = ELANNet_Tiny(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
-    elif cfg['backbone'] == 'elannet_nano':
-        backbone = ELANNet_Nano(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
     # pyramid feat dims
-    feat_dims = backbone.feat_dims
+    feat_dims = backbone.feat_dims[-3:]
 
     # load imagenet pretrained weight
     if pretrained:
@@ -262,13 +208,11 @@ if __name__ == '__main__':
     import time
     from thop import profile
     cfg = {
-        'pretrained': False,
+        'pretrained': True,
         'backbone': 'elannet_huge',
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
-        'p6_feat': False,
-        'p7_feat': False,
     }
     model, feats = build_backbone(cfg)
     x = torch.randn(1, 3, 224, 224)

+ 10 - 16
models/detectors/yolov7/yolov7_basic.py

@@ -11,13 +11,11 @@ class SiLU(nn.Module):
     def forward(x):
         return x * torch.sigmoid(x)
 
-
 def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
 
     return conv
 
-
 def get_activation(act_type=None):
     if act_type == 'relu':
         return nn.ReLU(inplace=True)
@@ -28,14 +26,12 @@ def get_activation(act_type=None):
     elif act_type == 'silu':
         return nn.SiLU(inplace=True)
 
-
 def get_norm(norm_type, dim):
     if norm_type == 'BN':
         return nn.BatchNorm2d(dim)
     elif norm_type == 'GN':
         return nn.GroupNorm(num_groups=32, num_channels=dim)
 
-
 ## Basic conv layer
 class Conv(nn.Module):
     def __init__(self, 
@@ -82,18 +78,18 @@ class Conv(nn.Module):
 # ---------------------------- YOLOv7 Modules ----------------------------
 ## ELAN-Block proposed by YOLOv7
 class ELANBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, depth=2.0, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, squeeze_ratio=0.5, branch_depth :int=2, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANBlock, self).__init__()
-        inter_dim = int(in_dim * expand_ratio)
+        inter_dim = int(in_dim * squeeze_ratio)
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv3 = nn.Sequential(*[
             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            for _ in range(round(depth))
+            for _ in range(round(branch_depth))
         ])
         self.cv4 = nn.Sequential(*[
             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            for _ in range(round(depth))
+            for _ in range(round(branch_depth))
         ])
 
         self.out = Conv(inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
@@ -109,26 +105,25 @@ class ELANBlock(nn.Module):
 
         return out
 
-
 ## PaFPN's ELAN-Block proposed by YOLOv7
 class ELANBlockFPN(nn.Module):
-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, nbranch=4, depth=1, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, squeeze_ratio=0.5, branch_width :int=4, branch_depth :int=1, act_type='silu', norm_type='BN', depthwise=False):
         super(ELANBlockFPN, self).__init__()
         # Basic parameters
-        inter_dim = int(in_dim * expand_ratio)
-        inter_dim2 = int(inter_dim * expand_ratio) 
+        inter_dim = int(in_dim * squeeze_ratio)
+        inter_dim2 = int(inter_dim * squeeze_ratio) 
         # Network structure
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv3 = nn.ModuleList()
-        for idx in range(round(nbranch)):
+        for idx in range(round(branch_width)):
             if idx == 0:
                 cvs = [Conv(inter_dim, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
             else:
                 cvs = [Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
             # deeper
-            if round(depth) > 1:
-                for _ in range(1, round(depth)):
+            if round(branch_depth) > 1:
+                for _ in range(1, round(branch_depth)):
                     cvs.append(Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise))
                 self.cv3.append(nn.Sequential(*cvs))
             else:
@@ -149,7 +144,6 @@ class ELANBlockFPN(nn.Module):
 
         return out
 
-
 ## DownSample Block proposed by YOLOv7
 class DownSample(nn.Module):
     def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):

+ 60 - 58
models/detectors/yolov7/yolov7_pafpn.py

@@ -1,7 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from .yolov7_basic import Conv, ELANBlockFPN, DownSample, RepConv
+from .yolov7_basic import Conv, ELANBlockFPN, DownSample
 
 
 # PaFPN-ELAN (YOLOv7's)
@@ -9,81 +9,83 @@ class Yolov7PaFPN(nn.Module):
     def __init__(self, 
                  in_dims=[512, 1024, 512],
                  out_dim=None,
-                 width=1.0,
-                 depth=1.0,
-                 nbranch=4.0,
+                 channel_width : float = 1.0,
+                 branch_width  : int   = 4.0,
+                 branch_depth  : int   = 1.0,
                  act_type='silu',
                  norm_type='BN',
                  depthwise=False):
         super(Yolov7PaFPN, self).__init__()
-        self.in_dims = in_dims
-        c3, c4, c5 = in_dims
+        # ----------------------------- Basic parameters -----------------------------
+        self.fpn_dims = in_dims
+        self.channel_width = channel_width
+        self.branch_width = branch_width
+        self.branch_depth = branch_depth
+        c3, c4, c5 = self.fpn_dims
 
-        # top dwon
+        # ----------------------------- Top-down FPN -----------------------------
         ## P5 -> P4
-        self.reduce_layer_1 = Conv(c5, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
-        self.reduce_layer_2 = Conv(c4, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
-        self.top_down_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
-                                             out_dim=round(256*width),
-                                             expand_ratio=0.5,
-                                             nbranch=nbranch,
-                                             depth=depth,
+        self.reduce_layer_1 = Conv(c5, round(256*channel_width), k=1, norm_type=norm_type, act_type=act_type)
+        self.reduce_layer_2 = Conv(c4, round(256*channel_width), k=1, norm_type=norm_type, act_type=act_type)
+        self.top_down_layer_1 = ELANBlockFPN(in_dim=round(256*channel_width) + round(256*channel_width),
+                                             out_dim=round(256*channel_width),
+                                             squeeze_ratio=0.5,
+                                             branch_width=branch_width,
+                                             branch_depth=branch_depth,
                                              act_type=act_type,
                                              norm_type=norm_type,
                                              depthwise=depthwise
                                              )
-        # P4 -> P3
-        self.reduce_layer_3 = Conv(round(256*width), round(128*width), k=1, norm_type=norm_type, act_type=act_type)
-        self.reduce_layer_4 = Conv(c3, round(128*width), k=1, norm_type=norm_type, act_type=act_type)
-        self.top_down_layer_2 = ELANBlockFPN(in_dim=round(128*width) + round(128*width),
-                                             out_dim=round(128*width),
-                                             expand_ratio=0.5,
-                                             nbranch=nbranch,
-                                             depth=depth,
+        ## P4 -> P3
+        self.reduce_layer_3 = Conv(round(256*channel_width), round(128*channel_width), k=1, norm_type=norm_type, act_type=act_type)
+        self.reduce_layer_4 = Conv(c3, round(128*channel_width), k=1, norm_type=norm_type, act_type=act_type)
+        self.top_down_layer_2 = ELANBlockFPN(in_dim=round(128*channel_width) + round(128*channel_width),
+                                             out_dim=round(128*channel_width),
+                                             squeeze_ratio=0.5,
+                                             branch_width=branch_width,
+                                             branch_depth=branch_depth,
                                              act_type=act_type,
                                              norm_type=norm_type,
                                              depthwise=depthwise
                                              )
-
-        # bottom up
-        # P3 -> P4
-        self.downsample_layer_1 = DownSample(in_dim=round(128*width), out_dim=round(256*width),
-                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
-                                              out_dim=round(256*width),
-                                              expand_ratio=0.5,
-                                              nbranch=nbranch,
-                                              depth=depth,
+        # ----------------------------- Bottom-up FPN -----------------------------
+        ## P3 -> P4
+        self.downsample_layer_1 = DownSample(round(128*channel_width), round(256*channel_width), act_type, norm_type, depthwise)
+        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=round(256*channel_width) + round(256*channel_width),
+                                              out_dim=round(256*channel_width),
+                                              squeeze_ratio=0.5,
+                                              branch_width=branch_width,
+                                              branch_depth=branch_depth,
                                               act_type=act_type,
                                               norm_type=norm_type,
                                               depthwise=depthwise
                                               )
-        # P4 -> P5
-        self.downsample_layer_2 = DownSample(in_dim=round(256*width), out_dim=round(512*width),
-                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=round(512*width) + c5,
-                                              out_dim=round(512*width),
-                                              expand_ratio=0.5,
-                                              nbranch=nbranch,
-                                              depth=depth,
+        ## P4 -> P5
+        self.downsample_layer_2 = DownSample(round(256*channel_width), round(512*channel_width), act_type, norm_type, depthwise)
+        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=round(512*channel_width) + c5,
+                                              out_dim=round(512*channel_width),
+                                              squeeze_ratio=0.5,
+                                              branch_width=branch_width,
+                                              branch_depth=branch_depth,
                                               act_type=act_type,
                                               norm_type=norm_type,
                                               depthwise=depthwise
                                               )
-        self.head_conv_1 = Conv(round(128*width), round(256*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
-        self.head_conv_2 = Conv(round(256*width), round(512*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
-        self.head_conv_3 = Conv(round(512*width), round(1024*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
-        # output proj layers
+        # ----------------------------- Output Proj -----------------------------
+        ## Head convs
+        self.head_conv_1 = Conv(round(128*channel_width), round(256*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
+        self.head_conv_2 = Conv(round(256*channel_width), round(512*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
+        self.head_conv_3 = Conv(round(512*channel_width), round(1024*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
+        ## Output projs
         if out_dim is not None:
             self.out_layers = nn.ModuleList([
-                Conv(in_dim, out_dim, k=1,
-                     norm_type=norm_type, act_type=act_type)
-                     for in_dim in [round(256*width), round(512*width), round(1024*width)]
-                     ])
+                Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+                for in_dim in [round(256*channel_width), round(512*channel_width), round(1024*channel_width)]
+                ])
             self.out_dim = [out_dim] * 3
         else:
             self.out_layers = None
-            self.out_dim = [round(256*width), round(512*width), round(1024*width)]
+            self.out_dim = [round(256*channel_width), round(512*channel_width), round(1024*channel_width)]
 
 
     def forward(self, features):
@@ -130,15 +132,15 @@ def build_fpn(cfg, in_dims, out_dim=None):
     model = cfg['fpn']
     # build pafpn
     if model == 'yolov7_pafpn':
-        fpn_net = Yolov7PaFPN(in_dims=in_dims,
-                             out_dim=out_dim,
-                             width=cfg['width'],
-                             depth=cfg['depth'],
-                             nbranch=cfg['nbranch'],
-                             act_type=cfg['fpn_act'],
-                             norm_type=cfg['fpn_norm'],
-                             depthwise=cfg['fpn_depthwise']
-                             )
+        fpn_net = Yolov7PaFPN(in_dims       = in_dims,
+                              out_dim       = out_dim,
+                              channel_width = cfg['channel_width'],
+                              branch_width  = cfg['branch_width'],
+                              branch_depth  = cfg['branch_depth'],
+                              act_type      = cfg['fpn_act'],
+                              norm_type     = cfg['fpn_norm'],
+                              depthwise     = cfg['fpn_depthwise']
+                              )
 
 
     return fpn_net

+ 2 - 0
test.py

@@ -65,6 +65,8 @@ def parse_args():
                         help='mosaic augmentation.')
     parser.add_argument('--mixup', default=None, type=float,
                         help='mixup augmentation.')
+    parser.add_argument('--load_cache', action='store_true', default=False,
+                        help='load data into memory.')
 
     return parser.parse_args()