2 yıl önce · 93b7481820
--- a/config/model_config/yolov7_config.py
+++ b/config/model_config/yolov7_config.py
@@ -23,9 +23,9 @@ yolov7_cfg = {
 
				         'fpn_act': 'silu',
			
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
 
				-        'nbranch': 2.0,       # number of branch in ELANBlockFPN
			
 
				-        'depth': 1.0,         # depth factor of each branch in ELANBlockFPN
			
 
				-        'width': 0.5,         # width factor of channel in FPN
			
 
				+        'branch_width': 2,           # number of branch in ELANBlockFPN
			
 
				+        'branch_depth': 1,           # number of each branch's depth in ELANBlockFPN
			
 
				+        'channel_width': 0.5,        # width factor of channels in FPN
			
 
				         ## Head
			
 
				         'head': 'decoupled_head',
			
 
				         'head_act': 'silu',
			
@@ -72,9 +72,9 @@ yolov7_cfg = {
 
				         'fpn_act': 'silu',
			
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
 
				-        'nbranch': 4.0,       # number of branch in ELANBlockFPN
			
 
				-        'depth': 1.0,         # depth factor of each branch in ELANBlockFPN
			
 
				-        'width': 1.0,         # width factor of channel in FPN
			
 
				+        'branch_width': 4,           # number of branch in ELANBlockFPN
			
 
				+        'branch_depth': 1,           # number of each branch's depth in ELANBlockFPN
			
 
				+        'channel_width': 1.0,        # width factor of channel in FPN
			
 
				         # head
			
 
				         'head': 'decoupled_head',
			
 
				         'head_act': 'silu',
			
@@ -121,9 +121,9 @@ yolov7_cfg = {
 
				         'fpn_act': 'silu',
			
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
 
				-        'nbranch': 4.0,        # number of branch in ELANBlockFPN
			
 
				-        'depth': 2.0,          # depth factor of each branch in ELANBlockFPN
			
 
				-        'width': 1.25,         # width factor of channel in FPN
			
 
				+        'branch_width': 4,             # number of branch in ELANBlockFPN
			
 
				+        'branch_depth': 2,             # # number of each branch's depth in ELANBlockFPN
			
 
				+        'channel_width': 1.25,         # width factor of channel in FPN
			
 
				         ## Head
			
 
				         'head': 'decoupled_head',
			
 
				         'head_act': 'silu',
			
--- a/eval.py
+++ b/eval.py
@@ -52,6 +52,8 @@ def parse_args():
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=None, type=float,
			
 
				                         help='mixup augmentation.')
			
 
				+    parser.add_argument('--load_cache', action='store_true', default=False,
			
 
				+                        help='load data into memory.')
			
 
				 
			
 
				     # TTA
			
 
				     parser.add_argument('-tta', '--test_aug', action='store_true', default=False,
			
--- a/models/detectors/yolov7/yolov7.py
+++ b/models/detectors/yolov7/yolov7.py
@@ -40,7 +40,7 @@ class YOLOv7(nn.Module):
 
				         feats_dim[-1] = self.neck.out_dim
			
 
				 
			
 
				         ## 颈部网络: 特征金字塔
			
 
				-        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['width']))
			
 
				+        self.fpn = build_fpn(cfg=cfg, in_dims=feats_dim, out_dim=round(256*cfg['channel_width']))
			
 
				         self.head_dim = self.fpn.out_dim
			
 
				 
			
 
				         ## 检测头
			
@@ -216,9 +216,11 @@ class YOLOv7(nn.Module):
 
				 
			
 
				             # 检测头
			
 
				             all_anchors = []
			
 
				+            all_strides = []
			
 
				             all_obj_preds = []
			
 
				             all_cls_preds = []
			
 
				             all_box_preds = []
			
 
				+            all_reg_preds = []
			
 
				             for level, (feat, head) in enumerate(zip(pyramid_feats, self.non_shared_heads)):
			
 
				                 cls_feat, reg_feat = head(feat)
			
 
				 
			
@@ -232,6 +234,9 @@ class YOLOv7(nn.Module):
 
				                 # generate anchor boxes: [M, 4]
			
 
				                 anchors = self.generate_anchors(level, fmp_size)
			
 
				                 
			
 
				+                # stride tensor: [M, 1]
			
 
				+                stride_tensor = torch.ones_like(anchors[..., :1]) * self.stride[level]
			
 
				+
			
 
				                 # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
			
 
				                 obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
			
 
				                 cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
			
@@ -247,13 +252,18 @@ class YOLOv7(nn.Module):
 
				                 all_obj_preds.append(obj_pred)
			
 
				                 all_cls_preds.append(cls_pred)
			
 
				                 all_box_preds.append(box_pred)
			
 
				+                all_reg_preds.append(reg_pred)
			
 
				                 all_anchors.append(anchors)
			
 
				+                all_strides.append(stride_tensor)
			
 
				             
			
 
				             # output dict
			
 
				             outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
			
 
				                        "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
			
 
				                        "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
			
 
				-                       "anchors": all_anchors,           # List(Tensor) [B, M, 2]
			
 
				-                       'strides': self.stride}           # List(Int) [8, 16, 32]
			
 
				+                       "pred_reg": all_reg_preds,        # List(Tensor) [B, M, 4]
			
 
				+                       "anchors": all_anchors,           # List(Tensor) [M, 2]
			
 
				+                       "strides": self.stride,           # List(Int) [8, 16, 32]
			
 
				+                       "stride_tensors": all_strides     # List(Tensor) [M, 1]
			
 
				+                       }
			
 
				 
			
 
				             return outputs 
			
--- a/models/detectors/yolov7/yolov7_backbone.py
+++ b/models/detectors/yolov7/yolov7_backbone.py
@@ -8,7 +8,6 @@ except:
 
				     
			
 
				 
			
 
				 model_urls = {
			
 
				-    "elannet_nano": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_nano.pth",
			
 
				     "elannet_tiny": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_tiny.pth",
			
 
				     "elannet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_large.pth",
			
 
				     "elannet_huge": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_huge.pth",
			
@@ -16,86 +15,40 @@ model_urls = {
 
				 
			
 
				 
			
 
				 # --------------------- ELANNet -----------------------
			
 
				-# ELANNet-Nano
			
 
				-class ELANNet_Nano(nn.Module):
			
 
				-    def __init__(self, act_type='lrelu', norm_type='BN', depthwise=True):
			
 
				-        super(ELANNet_Nano, self).__init__()
			
 
				-        self.feat_dims = [64, 128, 256]
			
 
				-        
			
 
				-        # P1/2
			
 
				-        self.layer_1 = Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        # P2/4
			
 
				-        self.layer_2 = nn.Sequential(   
			
 
				-            Conv(16, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				-            ELANBlock(in_dim=32, out_dim=32, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        )
			
 
				-        # P3/8
			
 
				-        self.layer_3 = nn.Sequential(
			
 
				-            nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=32, out_dim=64, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        )
			
 
				-        # P4/16
			
 
				-        self.layer_4 = nn.Sequential(
			
 
				-            nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        )
			
 
				-        # P5/32
			
 
				-        self.layer_5 = nn.Sequential(
			
 
				-            nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        c1 = self.layer_1(x)
			
 
				-        c2 = self.layer_2(c1)
			
 
				-        c3 = self.layer_3(c2)
			
 
				-        c4 = self.layer_4(c3)
			
 
				-        c5 = self.layer_5(c4)
			
 
				-
			
 
				-        outputs = [c3, c4, c5]
			
 
				-
			
 
				-        return outputs
			
 
				-
			
 
				-
			
 
				-# ELANNet-Tiny
			
 
				+## ELANNet-Tiny
			
 
				 class ELANNet_Tiny(nn.Module):
			
 
				     """
			
 
				     ELAN-Net of YOLOv7-Tiny.
			
 
				     """
			
 
				     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(ELANNet_Tiny, self).__init__()
			
 
				-        self.feat_dims = [128, 256, 512]
			
 
				+        # -------------- Basic parameters --------------
			
 
				+        self.feat_dims = [32, 64, 128, 256, 512]
			
 
				+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.5]  # Stage-1 -> Stage-4
			
 
				+        self.branch_depths = [1, 1, 1, 1]            # Stage-1 -> Stage-4
			
 
				         
			
 
				-        # P1/2
			
 
				-        self.layer_1 = Conv(3, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        # P2/4
			
 
				+        # -------------- Network parameters --------------
			
 
				+        ## P1/2
			
 
				+        self.layer_1 = Conv(3, self.feat_dims[0], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+        ## P2/4: Stage-1
			
 
				         self.layer_2 = nn.Sequential(   
			
 
				-            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				-            ELANBlock(in_dim=64, out_dim=64, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				+            ELANBlock(self.feat_dims[1], self.feat_dims[1], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P3/8
			
 
				+        ## P3/8: Stage-2
			
 
				         self.layer_3 = nn.Sequential(
			
 
				             nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=64, out_dim=128, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            ELANBlock(self.feat_dims[1], self.feat_dims[2], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P4/16
			
 
				+        ## P4/16: Stage-3
			
 
				         self.layer_4 = nn.Sequential(
			
 
				             nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P5/32
			
 
				+        ## P5/32: Stage-4
			
 
				         self.layer_5 = nn.Sequential(
			
 
				             nn.MaxPool2d((2, 2), 2),             
			
 
				-            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=1,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				 
			
 
				 
			
@@ -110,42 +63,41 @@ class ELANNet_Tiny(nn.Module):
 
				 
			
 
				         return outputs
			
 
				 
			
 
				-
			
 
				 ## ELANNet-Large
			
 
				 class ELANNet_Lagre(nn.Module):
			
 
				     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(ELANNet_Lagre, self).__init__()
			
 
				-        self.feat_dims = [512, 1024, 1024]
			
 
				-        
			
 
				-        # P1/2
			
 
				+        # -------------------- Basic parameters --------------------
			
 
				+        self.feat_dims = [32, 64, 128, 256, 512, 1024, 1024]
			
 
				+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25] # Stage-1 -> Stage-4
			
 
				+        self.branch_depths = [2, 2, 2, 2]            # Stage-1 -> Stage-4
			
 
				+
			
 
				+        # -------------------- Network parameters --------------------
			
 
				+        ## P1/2
			
 
				         self.layer_1 = nn.Sequential(
			
 
				-            Conv(3, 32, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
			
 
				-            Conv(32, 64, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            Conv(64, 64, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            Conv(3, self.feat_dims[0], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
			
 
				+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            Conv(self.feat_dims[1], self.feat_dims[1], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P2/4
			
 
				+        ## P2/4: Stage-1
			
 
				         self.layer_2 = nn.Sequential(   
			
 
				-            Conv(64, 128, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				-            ELANBlock(in_dim=128, out_dim=256, expand_ratio=0.5, depth=2,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P3/8
			
 
				+        ## P3/8: Stage-2
			
 
				         self.layer_3 = nn.Sequential(
			
 
				-            DownSample(in_dim=256, out_dim=256, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=256, out_dim=512, expand_ratio=0.5, depth=2,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[3], self.feat_dims[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P4/16
			
 
				+        ## P4/16: Stage-3
			
 
				         self.layer_4 = nn.Sequential(
			
 
				-            DownSample(in_dim=512, out_dim=512, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=512, out_dim=1024, expand_ratio=0.5, depth=2,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[4], self.feat_dims[4], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P5/32
			
 
				+        ## P5/32: Stage-4
			
 
				         self.layer_5 = nn.Sequential(
			
 
				-            DownSample(in_dim=1024, out_dim=1024, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=1024, out_dim=1024, expand_ratio=0.25, depth=2,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[5], self.feat_dims[5], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				 
			
 
				 
			
@@ -160,42 +112,41 @@ class ELANNet_Lagre(nn.Module):
 
				 
			
 
				         return outputs
			
 
				 
			
 
				-
			
 
				 ## ELANNet-Huge
			
 
				 class ELANNet_Huge(nn.Module):
			
 
				     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(ELANNet_Huge, self).__init__()
			
 
				-        self.feat_dims = [640, 1280, 1280]
			
 
				-        
			
 
				-        # P1/2
			
 
				+        # -------------------- Basic parameters --------------------
			
 
				+        self.feat_dims = [40, 80, 160, 320, 640, 1280, 1280]
			
 
				+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25] # Stage-1 -> Stage-4
			
 
				+        self.branch_depths = [3, 3, 3, 3]            # Stage-1 -> Stage-4
			
 
				+
			
 
				+        # -------------------- Network parameters --------------------
			
 
				+        ## P1/2
			
 
				         self.layer_1 = nn.Sequential(
			
 
				-            Conv(3, 40, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            Conv(40, 80, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            Conv(80, 80, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            Conv(3, self.feat_dims[0], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),      
			
 
				+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            Conv(self.feat_dims[1], self.feat_dims[1], k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P2/4
			
 
				+        ## P2/4: Stage-1
			
 
				         self.layer_2 = nn.Sequential(   
			
 
				-            Conv(80, 160, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=160, out_dim=320, expand_ratio=0.5, depth=3,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            Conv(self.feat_dims[1], self.feat_dims[2], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise),             
			
 
				+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P3/8
			
 
				+        ## P3/8: Stage-2
			
 
				         self.layer_3 = nn.Sequential(
			
 
				-            DownSample(in_dim=320, out_dim=320, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=320, out_dim=640, expand_ratio=0.5, depth=3,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[3], self.feat_dims[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P4/16
			
 
				+        ## P4/16: Stage-3
			
 
				         self.layer_4 = nn.Sequential(
			
 
				-            DownSample(in_dim=640, out_dim=640, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=640, out_dim=1280, expand_ratio=0.5, depth=3,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[4], self.feat_dims[4], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				-        # P5/32
			
 
				+        ## P5/32: Stage-4
			
 
				         self.layer_5 = nn.Sequential(
			
 
				-            DownSample(in_dim=1280, out_dim=1280, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				-            ELANBlock(in_dim=1280, out_dim=1280, expand_ratio=0.25, depth=3,
			
 
				-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+            DownSample(self.feat_dims[5], self.feat_dims[5], act_type=act_type, norm_type=norm_type, depthwise=depthwise),
			
 
				+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3], act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				         )
			
 
				 
			
 
				 
			
@@ -212,11 +163,8 @@ class ELANNet_Huge(nn.Module):
 
				 
			
 
				 
			
 
				 # --------------------- Functions -----------------------
			
 
				+## build backbone
			
 
				 def build_backbone(cfg, pretrained=False): 
			
 
				-    """Constructs a ELANNet model.
			
 
				-    Args:
			
 
				-        pretrained (bool): If True, returns a model pre-trained on ImageNet
			
 
				-    """
			
 
				     # build backbone
			
 
				     if cfg['backbone'] == 'elannet_huge':
			
 
				         backbone = ELANNet_Huge(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
@@ -224,10 +172,8 @@ def build_backbone(cfg, pretrained=False):
 
				         backbone = ELANNet_Lagre(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
 
				     elif cfg['backbone'] == 'elannet_tiny':
			
 
				         backbone = ELANNet_Tiny(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
 
				-    elif cfg['backbone'] == 'elannet_nano':
			
 
				-        backbone = ELANNet_Nano(cfg['bk_act'], cfg['bk_norm'], cfg['bk_dpw'])
			
 
				     # pyramid feat dims
			
 
				-    feat_dims = backbone.feat_dims
			
 
				+    feat_dims = backbone.feat_dims[-3:]
			
 
				 
			
 
				     # load imagenet pretrained weight
			
 
				     if pretrained:
			
@@ -262,13 +208,11 @@ if __name__ == '__main__':
 
				     import time
			
 
				     from thop import profile
			
 
				     cfg = {
			
 
				-        'pretrained': False,
			
 
				+        'pretrained': True,
			
 
				         'backbone': 'elannet_huge',
			
 
				         'bk_act': 'silu',
			
 
				         'bk_norm': 'BN',
			
 
				         'bk_dpw': False,
			
 
				-        'p6_feat': False,
			
 
				-        'p7_feat': False,
			
 
				     }
			
 
				     model, feats = build_backbone(cfg)
			
 
				     x = torch.randn(1, 3, 224, 224)
			
--- a/models/detectors/yolov7/yolov7_basic.py
+++ b/models/detectors/yolov7/yolov7_basic.py
@@ -11,13 +11,11 @@ class SiLU(nn.Module):
 
				     def forward(x):
			
 
				         return x * torch.sigmoid(x)
			
 
				 
			
 
				-
			
 
				 def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
			
 
				     conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
			
 
				 
			
 
				     return conv
			
 
				 
			
 
				-
			
 
				 def get_activation(act_type=None):
			
 
				     if act_type == 'relu':
			
 
				         return nn.ReLU(inplace=True)
			
@@ -28,14 +26,12 @@ def get_activation(act_type=None):
 
				     elif act_type == 'silu':
			
 
				         return nn.SiLU(inplace=True)
			
 
				 
			
 
				-
			
 
				 def get_norm(norm_type, dim):
			
 
				     if norm_type == 'BN':
			
 
				         return nn.BatchNorm2d(dim)
			
 
				     elif norm_type == 'GN':
			
 
				         return nn.GroupNorm(num_groups=32, num_channels=dim)
			
 
				 
			
 
				-
			
 
				 ## Basic conv layer
			
 
				 class Conv(nn.Module):
			
 
				     def __init__(self, 
			
@@ -82,18 +78,18 @@ class Conv(nn.Module):
 
				 # ---------------------------- YOLOv7 Modules ----------------------------
			
 
				 ## ELAN-Block proposed by YOLOv7
			
 
				 class ELANBlock(nn.Module):
			
 
				-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, depth=2.0, act_type='silu', norm_type='BN', depthwise=False):
			
 
				+    def __init__(self, in_dim, out_dim, squeeze_ratio=0.5, branch_depth :int=2, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(ELANBlock, self).__init__()
			
 
				-        inter_dim = int(in_dim * expand_ratio)
			
 
				+        inter_dim = int(in_dim * squeeze_ratio)
			
 
				         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				         self.cv3 = nn.Sequential(*[
			
 
				             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-            for _ in range(round(depth))
			
 
				+            for _ in range(round(branch_depth))
			
 
				         ])
			
 
				         self.cv4 = nn.Sequential(*[
			
 
				             Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-            for _ in range(round(depth))
			
 
				+            for _ in range(round(branch_depth))
			
 
				         ])
			
 
				 
			
 
				         self.out = Conv(inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
			
@@ -109,26 +105,25 @@ class ELANBlock(nn.Module):
 
				 
			
 
				         return out
			
 
				 
			
 
				-
			
 
				 ## PaFPN's ELAN-Block proposed by YOLOv7
			
 
				 class ELANBlockFPN(nn.Module):
			
 
				-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, nbranch=4, depth=1, act_type='silu', norm_type='BN', depthwise=False):
			
 
				+    def __init__(self, in_dim, out_dim, squeeze_ratio=0.5, branch_width :int=4, branch_depth :int=1, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(ELANBlockFPN, self).__init__()
			
 
				         # Basic parameters
			
 
				-        inter_dim = int(in_dim * expand_ratio)
			
 
				-        inter_dim2 = int(inter_dim * expand_ratio) 
			
 
				+        inter_dim = int(in_dim * squeeze_ratio)
			
 
				+        inter_dim2 = int(inter_dim * squeeze_ratio) 
			
 
				         # Network structure
			
 
				         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				         self.cv3 = nn.ModuleList()
			
 
				-        for idx in range(round(nbranch)):
			
 
				+        for idx in range(round(branch_width)):
			
 
				             if idx == 0:
			
 
				                 cvs = [Conv(inter_dim, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
			
 
				             else:
			
 
				                 cvs = [Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)]
			
 
				             # deeper
			
 
				-            if round(depth) > 1:
			
 
				-                for _ in range(1, round(depth)):
			
 
				+            if round(branch_depth) > 1:
			
 
				+                for _ in range(1, round(branch_depth)):
			
 
				                     cvs.append(Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise))
			
 
				                 self.cv3.append(nn.Sequential(*cvs))
			
 
				             else:
			
@@ -149,7 +144,6 @@ class ELANBlockFPN(nn.Module):
 
				 
			
 
				         return out
			
 
				 
			
 
				-
			
 
				 ## DownSample Block proposed by YOLOv7
			
 
				 class DownSample(nn.Module):
			
 
				     def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
			
--- a/models/detectors/yolov7/yolov7_pafpn.py
+++ b/models/detectors/yolov7/yolov7_pafpn.py
@@ -1,7 +1,7 @@
 
				 import torch
			
 
				 import torch.nn as nn
			
 
				 import torch.nn.functional as F
			
 
				-from .yolov7_basic import Conv, ELANBlockFPN, DownSample, RepConv
			
 
				+from .yolov7_basic import Conv, ELANBlockFPN, DownSample
			
 
				 
			
 
				 
			
 
				 # PaFPN-ELAN (YOLOv7's)
			
@@ -9,81 +9,83 @@ class Yolov7PaFPN(nn.Module):
 
				     def __init__(self, 
			
 
				                  in_dims=[512, 1024, 512],
			
 
				                  out_dim=None,
			
 
				-                 width=1.0,
			
 
				-                 depth=1.0,
			
 
				-                 nbranch=4.0,
			
 
				+                 channel_width : float = 1.0,
			
 
				+                 branch_width  : int   = 4.0,
			
 
				+                 branch_depth  : int   = 1.0,
			
 
				                  act_type='silu',
			
 
				                  norm_type='BN',
			
 
				                  depthwise=False):
			
 
				         super(Yolov7PaFPN, self).__init__()
			
 
				-        self.in_dims = in_dims
			
 
				-        c3, c4, c5 = in_dims
			
 
				+        # ----------------------------- Basic parameters -----------------------------
			
 
				+        self.fpn_dims = in_dims
			
 
				+        self.channel_width = channel_width
			
 
				+        self.branch_width = branch_width
			
 
				+        self.branch_depth = branch_depth
			
 
				+        c3, c4, c5 = self.fpn_dims
			
 
				 
			
 
				-        # top dwon
			
 
				+        # ----------------------------- Top-down FPN -----------------------------
			
 
				         ## P5 -> P4
			
 
				-        self.reduce_layer_1 = Conv(c5, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				-        self.reduce_layer_2 = Conv(c4, round(256*width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				-        self.top_down_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
			
 
				-                                             out_dim=round(256*width),
			
 
				-                                             expand_ratio=0.5,
			
 
				-                                             nbranch=nbranch,
			
 
				-                                             depth=depth,
			
 
				+        self.reduce_layer_1 = Conv(c5, round(256*channel_width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				+        self.reduce_layer_2 = Conv(c4, round(256*channel_width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				+        self.top_down_layer_1 = ELANBlockFPN(in_dim=round(256*channel_width) + round(256*channel_width),
			
 
				+                                             out_dim=round(256*channel_width),
			
 
				+                                             squeeze_ratio=0.5,
			
 
				+                                             branch_width=branch_width,
			
 
				+                                             branch_depth=branch_depth,
			
 
				                                              act_type=act_type,
			
 
				                                              norm_type=norm_type,
			
 
				                                              depthwise=depthwise
			
 
				                                              )
			
 
				-        # P4 -> P3
			
 
				-        self.reduce_layer_3 = Conv(round(256*width), round(128*width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				-        self.reduce_layer_4 = Conv(c3, round(128*width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				-        self.top_down_layer_2 = ELANBlockFPN(in_dim=round(128*width) + round(128*width),
			
 
				-                                             out_dim=round(128*width),
			
 
				-                                             expand_ratio=0.5,
			
 
				-                                             nbranch=nbranch,
			
 
				-                                             depth=depth,
			
 
				+        ## P4 -> P3
			
 
				+        self.reduce_layer_3 = Conv(round(256*channel_width), round(128*channel_width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				+        self.reduce_layer_4 = Conv(c3, round(128*channel_width), k=1, norm_type=norm_type, act_type=act_type)
			
 
				+        self.top_down_layer_2 = ELANBlockFPN(in_dim=round(128*channel_width) + round(128*channel_width),
			
 
				+                                             out_dim=round(128*channel_width),
			
 
				+                                             squeeze_ratio=0.5,
			
 
				+                                             branch_width=branch_width,
			
 
				+                                             branch_depth=branch_depth,
			
 
				                                              act_type=act_type,
			
 
				                                              norm_type=norm_type,
			
 
				                                              depthwise=depthwise
			
 
				                                              )
			
 
				-
			
 
				-        # bottom up
			
 
				-        # P3 -> P4
			
 
				-        self.downsample_layer_1 = DownSample(in_dim=round(128*width), out_dim=round(256*width),
			
 
				-                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=round(256*width) + round(256*width),
			
 
				-                                              out_dim=round(256*width),
			
 
				-                                              expand_ratio=0.5,
			
 
				-                                              nbranch=nbranch,
			
 
				-                                              depth=depth,
			
 
				+        # ----------------------------- Bottom-up FPN -----------------------------
			
 
				+        ## P3 -> P4
			
 
				+        self.downsample_layer_1 = DownSample(round(128*channel_width), round(256*channel_width), act_type, norm_type, depthwise)
			
 
				+        self.bottom_up_layer_1 = ELANBlockFPN(in_dim=round(256*channel_width) + round(256*channel_width),
			
 
				+                                              out_dim=round(256*channel_width),
			
 
				+                                              squeeze_ratio=0.5,
			
 
				+                                              branch_width=branch_width,
			
 
				+                                              branch_depth=branch_depth,
			
 
				                                               act_type=act_type,
			
 
				                                               norm_type=norm_type,
			
 
				                                               depthwise=depthwise
			
 
				                                               )
			
 
				-        # P4 -> P5
			
 
				-        self.downsample_layer_2 = DownSample(in_dim=round(256*width), out_dim=round(512*width),
			
 
				-                                             act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				-        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=round(512*width) + c5,
			
 
				-                                              out_dim=round(512*width),
			
 
				-                                              expand_ratio=0.5,
			
 
				-                                              nbranch=nbranch,
			
 
				-                                              depth=depth,
			
 
				+        ## P4 -> P5
			
 
				+        self.downsample_layer_2 = DownSample(round(256*channel_width), round(512*channel_width), act_type, norm_type, depthwise)
			
 
				+        self.bottom_up_layer_2 = ELANBlockFPN(in_dim=round(512*channel_width) + c5,
			
 
				+                                              out_dim=round(512*channel_width),
			
 
				+                                              squeeze_ratio=0.5,
			
 
				+                                              branch_width=branch_width,
			
 
				+                                              branch_depth=branch_depth,
			
 
				                                               act_type=act_type,
			
 
				                                               norm_type=norm_type,
			
 
				                                               depthwise=depthwise
			
 
				                                               )
			
 
				-        self.head_conv_1 = Conv(round(128*width), round(256*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.head_conv_2 = Conv(round(256*width), round(512*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				-        self.head_conv_3 = Conv(round(512*width), round(1024*width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				-        # output proj layers
			
 
				+        # ----------------------------- Output Proj -----------------------------
			
 
				+        ## Head convs
			
 
				+        self.head_conv_1 = Conv(round(128*channel_width), round(256*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				+        self.head_conv_2 = Conv(round(256*channel_width), round(512*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				+        self.head_conv_3 = Conv(round(512*channel_width), round(1024*channel_width), k=3, s=1, p=1, act_type=act_type, norm_type=norm_type)
			
 
				+        ## Output projs
			
 
				         if out_dim is not None:
			
 
				             self.out_layers = nn.ModuleList([
			
 
				-                Conv(in_dim, out_dim, k=1,
			
 
				-                     norm_type=norm_type, act_type=act_type)
			
 
				-                     for in_dim in [round(256*width), round(512*width), round(1024*width)]
			
 
				-                     ])
			
 
				+                Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
			
 
				+                for in_dim in [round(256*channel_width), round(512*channel_width), round(1024*channel_width)]
			
 
				+                ])
			
 
				             self.out_dim = [out_dim] * 3
			
 
				         else:
			
 
				             self.out_layers = None
			
 
				-            self.out_dim = [round(256*width), round(512*width), round(1024*width)]
			
 
				+            self.out_dim = [round(256*channel_width), round(512*channel_width), round(1024*channel_width)]
			
 
				 
			
 
				 
			
 
				     def forward(self, features):
			
@@ -130,15 +132,15 @@ def build_fpn(cfg, in_dims, out_dim=None):
 
				     model = cfg['fpn']
			
 
				     # build pafpn
			
 
				     if model == 'yolov7_pafpn':
			
 
				-        fpn_net = Yolov7PaFPN(in_dims=in_dims,
			
 
				-                             out_dim=out_dim,
			
 
				-                             width=cfg['width'],
			
 
				-                             depth=cfg['depth'],
			
 
				-                             nbranch=cfg['nbranch'],
			
 
				-                             act_type=cfg['fpn_act'],
			
 
				-                             norm_type=cfg['fpn_norm'],
			
 
				-                             depthwise=cfg['fpn_depthwise']
			
 
				-                             )
			
 
				+        fpn_net = Yolov7PaFPN(in_dims       = in_dims,
			
 
				+                              out_dim       = out_dim,
			
 
				+                              channel_width = cfg['channel_width'],
			
 
				+                              branch_width  = cfg['branch_width'],
			
 
				+                              branch_depth  = cfg['branch_depth'],
			
 
				+                              act_type      = cfg['fpn_act'],
			
 
				+                              norm_type     = cfg['fpn_norm'],
			
 
				+                              depthwise     = cfg['fpn_depthwise']
			
 
				+                              )
			
 
				 
			
 
				 
			
 
				     return fpn_net
			
--- a/test.py
+++ b/test.py
@@ -65,6 +65,8 @@ def parse_args():
 
				                         help='mosaic augmentation.')
			
 
				     parser.add_argument('--mixup', default=None, type=float,
			
 
				                         help='mixup augmentation.')
			
 
				+    parser.add_argument('--load_cache', action='store_true', default=False,
			
 
				+                        help='load data into memory.')
			
 
				 
			
 
				     return parser.parse_args()