yjh0410 2 anni fa
parent
commit
fd8e9e5e0c

+ 24 - 3
config/__init__.py

@@ -1,4 +1,4 @@
-# ------------------------ Dataset Config ------------------------
+# ------------------ Dataset Config ------------------
 from .data_config.dataset_config import dataset_cfg
 
 
@@ -14,7 +14,7 @@ def build_dataset_config(args):
     return cfg
 
 
-# ------------------ Transform Config ----------------------
+# ------------------ Transform Config ------------------
 from .data_config.transform_config import (
     # YOLOv5-Style
     yolov5_pico_trans_config,
@@ -39,6 +39,13 @@ from .data_config.transform_config import (
     rtcdet_v1_medium_trans_config,
     rtcdet_v1_large_trans_config,
     rtcdet_v1_huge_trans_config,
+    # RTMDet-v2-Style
+    rtcdet_v2_pico_trans_config,
+    rtcdet_v2_nano_trans_config,
+    rtcdet_v2_small_trans_config,
+    rtcdet_v2_medium_trans_config,
+    rtcdet_v2_large_trans_config,
+    rtcdet_v2_huge_trans_config,
 )
 
 def build_trans_config(trans_config='ssd'):
@@ -91,12 +98,26 @@ def build_trans_config(trans_config='ssd'):
     elif trans_config == 'rtcdet_v1_huge':
         cfg = rtcdet_v1_huge_trans_config
 
+    # RTMDetv2-style transform 
+    elif trans_config == 'rtcdet_v2_pico':
+        cfg = rtcdet_v2_pico_trans_config
+    elif trans_config == 'rtcdet_v2_nano':
+        cfg = rtcdet_v2_nano_trans_config
+    elif trans_config == 'rtcdet_v2_small':
+        cfg = rtcdet_v2_small_trans_config
+    elif trans_config == 'rtcdet_v2_medium':
+        cfg = rtcdet_v2_medium_trans_config
+    elif trans_config == 'rtcdet_v2_large':
+        cfg = rtcdet_v2_large_trans_config
+    elif trans_config == 'rtcdet_v2_huge':
+        cfg = rtcdet_v2_huge_trans_config
+
     print('Transform Config: {} \n'.format(cfg))
 
     return cfg
 
 
-# ------------------ Model Config ----------------------
+# ------------------ Model Config ------------------
 ## YOLO series
 from .model_config.yolov1_config import yolov1_cfg
 from .model_config.yolov2_config import yolov2_cfg

+ 109 - 0
config/data_config/transform_config.py

@@ -359,3 +359,112 @@ rtcdet_v1_pico_trans_config = {
     'mixup_type': 'yolox_mixup',
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
 }
+
+# ----------------------- RTMDet-v2's Transform -----------------------
+rtcdet_v2_huge_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 1.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}
+
+rtcdet_v2_large_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 1.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}
+
+rtcdet_v2_medium_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 1.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}
+
+rtcdet_v2_small_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 1.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}
+
+rtcdet_v2_nano_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 1.0,
+    'mixup_prob': 0.1,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}
+
+rtcdet_v2_pico_trans_config = {
+    'aug_type': 'yolov5',
+    # Basic Augment
+    'degrees': 0.0,
+    'translate': 0.2,
+    'scale': [0.5, 2.0],
+    'shear': 0.0,
+    'perspective': 0.0,
+    'hsv_h': 0.015,
+    'hsv_s': 0.7,
+    'hsv_v': 0.4,
+    # Mosaic & Mixup
+    'mosaic_prob': 0.5,
+    'mixup_prob': 0.0,
+    'mosaic_type': 'yolov5_mosaic',
+    'mixup_type': 'yolov5_mixup'
+}

+ 2 - 2
config/model_config/rtcdet_v2_config.py

@@ -5,7 +5,7 @@ rtcdet_v2_cfg = {
     'rtcdet_v2_l':{
         # ---------------- Model config ----------------
         ## Backbone
-        'backbone': 'elannet',
+        'backbone': 'elannet_v2',
         'pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
@@ -15,7 +15,7 @@ rtcdet_v2_cfg = {
         'stride': [8, 16, 32],  # P3, P4, P5
         'max_stride': 32,
         ## Neck: SPP
-        'neck': 'sppf',
+        'neck': 'csp_sppf',
         'neck_expand_ratio': 0.5,
         'pooling_size': 5,
         'neck_act': 'silu',

+ 2 - 2
models/detectors/rtcdet_v1/rtcdet_v1_pafpn.py

@@ -51,8 +51,8 @@ class RTCDetPaFPN(nn.Module):
             self.out_dim = self.fpn_dims
 
 
-    def forward(self, features):
-        fpn_feats = [layer(feat) for feat, layer in zip(features, self.input_projs)]
+    def forward(self, fpn_feats):
+        fpn_feats = [layer(feat) for feat, layer in zip(fpn_feats, self.input_projs)]
         c3, c4, c5 = fpn_feats
 
         # Top down

+ 31 - 34
models/detectors/rtcdet_v2/rtcdet_v2_backbone.py

@@ -1,37 +1,34 @@
 import torch
 import torch.nn as nn
 try:
-    from .rtcdet_v2_basic import Conv, ELANStage, DSBlock
+    from .rtcdet_v2_basic import Conv, ELAN_Stage, DSBlock
 except:
-    from rtcdet_v2_basic import Conv, ELANStage, DSBlock
+    from rtcdet_v2_basic import Conv, ELAN_Stage, DSBlock
 
 
 model_urls = {
-    'fasternet_n': None,
-    'fasternet_t': None,
-    'fasternet_s': None,
-    'fasternet_m': None,
-    'fasternet_l': None,
-    'fasternet_x': None,
+    'elannet_v2_n': None,
+    'elannet_v2_t': None,
+    'elannet_v2_s': None,
+    'elannet_v2_m': None,
+    'elannet_v2_l': None,
+    'elannet_v2_x': None,
 }
 
 
 # ---------------------------- Backbones ----------------------------
-## Modified FasterNet
-class ELANNet(nn.Module):
+## Modified ELANNet-v2
+class ELANNetv2(nn.Module):
     def __init__(self, width=1.0, depth=1.0, act_type='silu', norm_type='BN', depthwise=False):
-        super(ELANNet, self).__init__()
+        super(ELANNetv2, self).__init__()
         # ------------------ Basic parameters ------------------
         ## scale factor
         self.width = width
         self.depth = depth
-        self.squeeze_ratio = [0.5, 0.25, 0.25, 0.25]
+        self.squeeze_ratio = [0.5, 0.5, 0.375, 0.25]
         ## pyramid feats
-        self.base_dims = [64, 128, 256, 512, 1024]
-        self.feat_dims = [round(dim * width) for dim in self.base_dims]
-        ## block depth
-        self.base_blocks = [3, 6, 6, 3]
-        self.feat_blocks = [round(nblock * depth) for nblock in self.base_blocks]
+        self.feat_dims = [round(dim * width) for dim in [64, 128, 256, 512, 1024]]
+        self.branch_depths = [round(dep * depth) for dep in [3, 6, 6, 3]]
         ## nonlinear
         self.act_type = act_type
         self.norm_type = norm_type
@@ -45,23 +42,23 @@ class ELANNet(nn.Module):
         )
         ## P2/4
         self.layer_2 = nn.Sequential(   
-            DSBlock(self.feat_dims[0], self.feat_dims[1], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            ELANStage(self.feat_dims[1], self.feat_dims[1], self.feat_blocks[0], self.squeeze_ratio[0], self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[0], self.feat_dims[1], self.act_type, self.norm_type, self.depthwise),
+            ELAN_Stage(self.feat_dims[1], self.feat_dims[1], self.squeeze_ratio[0], self.branch_depths[0], True, self.act_type, self.norm_type, self.depthwise)
         )
         ## P3/8
         self.layer_3 = nn.Sequential(
-            DSBlock(self.feat_dims[1], self.feat_dims[2], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            ELANStage(self.feat_dims[2], self.feat_dims[2], self.feat_blocks[1], self.squeeze_ratio[1], self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[1], self.feat_dims[2], self.act_type, self.norm_type, self.depthwise),
+            ELAN_Stage(self.feat_dims[2], self.feat_dims[2], self.squeeze_ratio[1], self.branch_depths[1], True, self.act_type, self.norm_type, self.depthwise)
         )
         ## P4/16
         self.layer_4 = nn.Sequential(
-            DSBlock(self.feat_dims[2], self.feat_dims[3], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            ELANStage(self.feat_dims[3], self.feat_dims[3], self.feat_blocks[2], self.squeeze_ratio[2], self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[2], self.feat_dims[3], self.act_type, self.norm_type, self.depthwise),
+            ELAN_Stage(self.feat_dims[3], self.feat_dims[3], self.squeeze_ratio[2], self.branch_depths[2], True, self.act_type, self.norm_type, self.depthwise)
         )
         ## P5/32
         self.layer_5 = nn.Sequential(
-            DSBlock(self.feat_dims[3], self.feat_dims[4], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            ELANStage(self.feat_dims[4], self.feat_dims[4], self.feat_blocks[3], self.squeeze_ratio[3], self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[3], self.feat_dims[4], self.act_type, self.norm_type, self.depthwise),
+            ELAN_Stage(self.feat_dims[4], self.feat_dims[4], self.squeeze_ratio[3], self.branch_depths[3], True, self.act_type, self.norm_type, self.depthwise)
         )
 
 
@@ -111,22 +108,22 @@ def load_weight(model, model_name):
 ## build MCNet
 def build_backbone(cfg, pretrained=False):
     # model
-    backbone = ELANNet(cfg['width'], cfg['depth'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
+    backbone = ELANNetv2(cfg['width'], cfg['depth'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
 
     # check whether to load imagenet pretrained weight
     if pretrained:
         if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='fasternet_n')
+            backbone = load_weight(backbone, model_name='elannet_v2_n')
         elif cfg['width'] == 0.375 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='fasternet_t')
+            backbone = load_weight(backbone, model_name='elannet_v2_t')
         elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='fasternet_s')
+            backbone = load_weight(backbone, model_name='elannet_v2_s')
         elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
-            backbone = load_weight(backbone, model_name='fasternet_m')
+            backbone = load_weight(backbone, model_name='elannet_v2_m')
         elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
-            backbone = load_weight(backbone, model_name='fasternet_l')
+            backbone = load_weight(backbone, model_name='elannet_v2_l')
         elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
-            backbone = load_weight(backbone, model_name='fasternet_x')
+            backbone = load_weight(backbone, model_name='elannet_v2_x')
     feat_dims = backbone.feat_dims[-3:]
 
     return backbone, feat_dims
@@ -137,8 +134,8 @@ if __name__ == '__main__':
     from thop import profile
     cfg = {
         ## Backbone
-        'backbone': 'mcnet',
-        'pretrained': True,
+        'backbone': 'elannet',
+        'pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,

+ 48 - 37
models/detectors/rtcdet_v2/rtcdet_v2_basic.py

@@ -151,22 +151,55 @@ class InverseBottleneck(nn.Module):
 
         return x + h if self.shortcut else h
 
+## YOLO-style BottleNeck
+class YoloBottleneck(nn.Module):
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 expand_ratio=0.5,
+                 shortcut=False,
+                 act_type='silu',
+                 norm_type='BN',
+                 depthwise=False):
+        super(YoloBottleneck, self).__init__()
+        # ------------------ Basic parameters ------------------
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.inter_dim = int(out_dim * expand_ratio)
+        self.shortcut = shortcut and in_dim == out_dim
+        # ------------------ Network parameters ------------------
+        self.cv1 = Conv(in_dim, self.inter_dim, k=1, norm_type=norm_type, act_type=act_type)
+        self.cv2 = Conv(self.inter_dim, out_dim, k=3, p=1, norm_type=norm_type, act_type=act_type, depthwise=depthwise)
+
+    def forward(self, x):
+        h = self.cv2(self.cv1(x))
+
+        return x + h if self.shortcut else h
+
 
 # ---------------------------- Base Modules ----------------------------
-## ELAN Block
-class ELANBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, squeeze_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
+## ELAN Stage of Backbone
+class ELAN_Stage(nn.Module):
+    def __init__(self, in_dim, out_dim, squeeze_ratio :float=0.5, branch_depth :int=1, shortcut=False, act_type='silu', norm_type='BN', depthwise=False):
         super().__init__()
         # ----------- Basic Parameters -----------
         self.in_dim = in_dim
         self.out_dim = out_dim
         self.inter_dim = round(in_dim * squeeze_ratio)
+        self.squeeze_ratio = squeeze_ratio
+        self.branch_depth = branch_depth
         # ----------- Network Parameters -----------
         self.cv1 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.cv3 = InverseBottleneck(self.inter_dim, self.inter_dim, expand_ratio=2, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.cv4 = InverseBottleneck(self.inter_dim, self.inter_dim, expand_ratio=2, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        # output
+        self.cv3 = nn.Sequential(*[
+            YoloBottleneck(self.inter_dim, self.inter_dim, 1.0, shortcut, act_type, norm_type, depthwise)
+            for _ in range(branch_depth)
+        ])
+        self.cv4 = nn.Sequential(*[
+            YoloBottleneck(self.inter_dim, self.inter_dim, 1.0, shortcut, act_type, norm_type, depthwise)
+            for _ in range(branch_depth)
+        ])
+        ## output
         self.out_conv = Conv(self.inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
     def forward(self, x):
@@ -177,27 +210,6 @@ class ELANBlock(nn.Module):
         out = self.out_conv(torch.cat([x1, x2, x3, x4], dim=1))
 
         return out
-
-## ELAN Stage
-class ELANStage(nn.Module):
-    def __init__(self, in_dim, out_dim, num_blocks=1, squeeze_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
-        super().__init__()
-        # -------------- Basic parameters --------------
-        self.in_dim = in_dim
-        self.out_dim = out_dim
-        self.num_blocks = num_blocks
-        self.inter_dim = in_dim // 2
-        # -------------- Network parameters --------------
-        self.stage_blocks = nn.Sequential()
-        for i in range(self.num_blocks):
-            if i == 0:
-                self.stage_blocks.append(ELANBlock(in_dim, out_dim, squeeze_ratio, act_type, norm_type, depthwise))
-            else:
-                self.stage_blocks.append(ELANBlock(out_dim, out_dim, squeeze_ratio, act_type, norm_type, depthwise))
-
-
-    def forward(self, x):
-        return self.stage_blocks(x)
     
 ## DownSample Block
 class DSBlock(nn.Module):
@@ -233,14 +245,15 @@ class DSBlock(nn.Module):
 ## build fpn's core block
 def build_fpn_block(cfg, in_dim, out_dim):
     if cfg['fpn_core_block'] == 'elan_block':
-        layer = ELANStage(in_dim        = in_dim,
-                          out_dim       = out_dim,
-                          num_blocks    = round(3 * cfg['depth']),
-                          squeeze_ratio = cfg['fpn_squeeze_ratio'],
-                          act_type      = cfg['fpn_act'],
-                          norm_type     = cfg['fpn_norm'],
-                          depthwise     = cfg['fpn_depthwise']
-                          )
+        layer = ELAN_Stage(in_dim        = in_dim,
+                           out_dim       = out_dim,
+                           squeeze_ratio = cfg['fpn_squeeze_ratio'],
+                           branch_depth  = round(3 * cfg['depth']),
+                           shortcut      = False,
+                           act_type      = cfg['fpn_act'],
+                           norm_type     = cfg['fpn_norm'],
+                           depthwise     = cfg['fpn_depthwise']
+                           )
         
     return layer
 
@@ -259,7 +272,5 @@ def build_downsample_layer(cfg, in_dim, out_dim):
     elif cfg['fpn_downsample_layer'] == 'maxpool':
         assert in_dim == out_dim
         layer = nn.MaxPool2d((2, 2), stride=2)
-    elif cfg['fpn_downsample_layer'] == 'dsblock':
-        layer = DSBlock(in_dim, out_dim, cfg['fpn_act'], cfg['fpn_norm'], cfg['fpn_depthwise'])
         
     return layer

+ 36 - 2
models/detectors/rtcdet_v2/rtcdet_v2_head.py

@@ -1,7 +1,10 @@
 import torch
 import torch.nn as nn
 
-from .rtcdet_v2_basic import Conv
+try:
+    from .rtcdet_v2_basic import Conv
+except:
+    from rtcdet_v2_basic import Conv
 
 
 # Single-level Head
@@ -113,4 +116,35 @@ def build_det_head(cfg, in_dim, out_dim, num_classes=80, reg_max=16, num_levels=
     if cfg['head'] == 'decoupled_head':
         head = MultiLevelHead(cfg, in_dim, out_dim, num_classes, reg_max, num_levels) 
 
-    return head
+    return head
+
+
+if __name__ == '__main__':
+    import time
+    from thop import profile
+    cfg = {
+        'head': 'decoupled_head',
+        'num_cls_head': 2,
+        'num_reg_head': 2,
+        'head_act': 'silu',
+        'head_norm': 'BN',
+        'head_depthwise': False,
+        'reg_max': 16,
+    }
+    fpn_dims = [256, 256, 256]
+    out_dim = 256
+    # Head-1
+    model = build_det_head(cfg, fpn_dims, out_dim, num_classes=80, reg_max=16, num_levels=3)
+    fpn_feats = [torch.randn(1, fpn_dims[0], 80, 80), torch.randn(1, fpn_dims[1], 40, 40), torch.randn(1, fpn_dims[2], 20, 20)]
+    t0 = time.time()
+    outputs = model(fpn_feats)
+    t1 = time.time()
+    print('Time: ', t1 - t0)
+    # for out in outputs:
+    #     print(out.shape)
+
+    print('==============================')
+    flops, params = profile(model, inputs=(fpn_feats, ), verbose=False)
+    print('==============================')
+    print('Head-1: GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
+    print('Head-1: Params : {:.2f} M'.format(params / 1e6))

+ 2 - 38
models/detectors/rtcdet_v2/rtcdet_v2_neck.py

@@ -24,32 +24,6 @@ class SPPF(nn.Module):
 
         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
 
-# Mixed Spatial Pyramid Pooling
-class MixedSPP(nn.Module):
-    def __init__(self, cfg, in_dim, out_dim, expand_ratio=2.0):
-        super().__init__()
-        # ------------- Basic parameters -------------
-        self.in_dim = in_dim
-        self.out_dim = out_dim
-        self.expand_dim = round(in_dim * expand_ratio)
-        self.num_maxpools = len(cfg['pooling_size']) + 1
-        # ------------- Network parameters -------------
-        self.input_proj = Conv(in_dim, self.expand_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
-        self.maxpools = nn.ModuleList()
-        for ksize in cfg['pooling_size']:
-            self.maxpools.append(nn.MaxPool2d(kernel_size=ksize, stride=1, padding=ksize// 2))
-        self.output_proj = Conv(self.expand_dim, out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
-
-    def forward(self, x):
-        x = self.input_proj(x)
-        x_chunks = torch.chunk(x, self.num_maxpools, dim=1)
-        out = [x_chunks[0]]
-        for x_chunk, maxpool in zip(x_chunks[1:], self.maxpools):
-            out.append(maxpool(x_chunk))
-        out = torch.cat(out, dim=1)
-
-        return self.output_proj(out)
-
 # SPPF block with CSP module
 class SPPFBlockCSP(nn.Module):
     """
@@ -61,22 +35,14 @@ class SPPFBlockCSP(nn.Module):
         self.out_dim = out_dim
         self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
         self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
-        self.m = nn.Sequential(
-            Conv(inter_dim, inter_dim, k=3, p=1, 
-                 act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 
-                 depthwise=cfg['neck_depthwise']),
-            SPPF(cfg, inter_dim, inter_dim, expand_ratio=1.0),
-            Conv(inter_dim, inter_dim, k=3, p=1, 
-                 act_type=cfg['neck_act'], norm_type=cfg['neck_norm'], 
-                 depthwise=cfg['neck_depthwise'])
-        )
+        self.spp = SPPF(cfg, inter_dim, inter_dim, expand_ratio=1.0)
         self.cv3 = Conv(inter_dim * 2, self.out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
 
         
     def forward(self, x):
         x1 = self.cv1(x)
         x2 = self.cv2(x)
-        x3 = self.m(x2)
+        x3 = self.spp(x2)
         y = self.cv3(torch.cat([x1, x3], dim=1))
 
         return y
@@ -92,8 +58,6 @@ def build_neck(cfg, in_dim, out_dim):
         neck = SPPF(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
     elif model == 'csp_sppf':
         neck = SPPFBlockCSP(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
-    elif model == 'mixed_spp':
-        neck = MixedSPP(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
 
     return neck
         

+ 41 - 6
models/detectors/rtcdet_v2/rtcdet_v2_pafpn.py

@@ -2,7 +2,10 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .rtcdet_v2_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block)
+try:
+    from .rtcdet_v2_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block)
+except:
+    from rtcdet_v2_basic import (Conv, build_reduce_layer, build_downsample_layer, build_fpn_block)
 
 
 # RTCDet-Style PaFPN
@@ -34,10 +37,8 @@ class RTCDetPaFPN(nn.Module):
         # --------------------------- Output proj ---------------------------
         if out_dim is not None:
             self.out_layers = nn.ModuleList([
-                Conv(in_dim, out_dim, k=1,
-                     act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
-                     for in_dim in self.fpn_dims
-                     ])
+                Conv(in_dim, out_dim, k=1, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
+                for in_dim in self.fpn_dims])
             self.out_dim = [out_dim] * 3
         else:
             self.out_layers = None
@@ -87,4 +88,38 @@ def build_fpn(cfg, in_dims, out_dim=None):
     if model == 'rtcdet_pafpn':
         fpn_net = RTCDetPaFPN(cfg, in_dims, out_dim)
 
-    return fpn_net
+    return fpn_net
+
+
+if __name__ == '__main__':
+    import time
+    from thop import profile
+    cfg = {
+        'width': 1.0,
+        'depth': 1.0,
+        'fpn': 'rtcdet_pafpn',
+        'fpn_reduce_layer': 'conv',
+        'fpn_downsample_layer': 'conv',
+        'fpn_core_block': 'elan_block',
+        'fpn_squeeze_ratio': 0.25,
+        'fpn_act': 'silu',
+        'fpn_norm': 'BN',
+        'fpn_depthwise': False,
+    }
+    fpn_dims = [256, 512, 1024]
+    out_dim = 256
+    # Head-1
+    model = build_fpn(cfg, fpn_dims, out_dim)
+    fpn_feats = [torch.randn(1, fpn_dims[0], 80, 80), torch.randn(1, fpn_dims[1], 40, 40), torch.randn(1, fpn_dims[2], 20, 20)]
+    t0 = time.time()
+    outputs = model(fpn_feats)
+    t1 = time.time()
+    print('Time: ', t1 - t0)
+    # for out in outputs:
+    #     print(out.shape)
+
+    print('==============================')
+    flops, params = profile(model, inputs=(fpn_feats, ), verbose=False)
+    print('==============================')
+    print('FPN: GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
+    print('FPN: Params : {:.2f} M'.format(params / 1e6))