Browse Source

redesign basic module of RTCDet-v2

yjh0410 2 years ago
parent
commit
d231ed4d2f

+ 6 - 5
config/model_config/rtcdet_v2_config.py

@@ -5,7 +5,7 @@ rtcdet_v2_cfg = {
     'rtcdet_v2_l':{
         # ---------------- Model config ----------------
         ## Backbone
-        'backbone': 'fasternet',
+        'backbone': 'elannet',
         'pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
@@ -15,9 +15,9 @@ rtcdet_v2_cfg = {
         'stride': [8, 16, 32],  # P3, P4, P5
         'max_stride': 32,
         ## Neck: SPP
-        'neck': 'mixed_spp',
-        'neck_expand_ratio': 2.0,
-        'pooling_size': [5, 9, 13],
+        'neck': 'sppf',
+        'neck_expand_ratio': 0.5,
+        'pooling_size': 5,
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
@@ -25,7 +25,8 @@ rtcdet_v2_cfg = {
         'fpn': 'rtcdet_pafpn',
         'fpn_reduce_layer': 'conv',
         'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'faster_block',
+        'fpn_core_block': 'elan_block',
+        'fpn_squeeze_ratio': 0.25,
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,

+ 13 - 13
models/detectors/rtcdet_v2/rtcdet_v2_backbone.py

@@ -1,10 +1,9 @@
 import torch
 import torch.nn as nn
 try:
-    from .rtcdet_v2_basic import Conv, CSPFasterStage, DSBlock
+    from .rtcdet_v2_basic import Conv, ELANStage, DSBlock
 except:
-    from rtcdet_v2_basic import Conv, CSPFasterStage, DSBlock
-
+    from rtcdet_v2_basic import Conv, ELANStage, DSBlock
 
 
 model_urls = {
@@ -18,19 +17,20 @@ model_urls = {
 
 
 # ---------------------------- Backbones ----------------------------
-# Modified FasterNet
-class FasterConvNet(nn.Module):
+## Modified FasterNet
+class ELANNet(nn.Module):
     def __init__(self, width=1.0, depth=1.0, act_type='silu', norm_type='BN', depthwise=False):
-        super(FasterConvNet, self).__init__()
+        super(ELANNet, self).__init__()
         # ------------------ Basic parameters ------------------
         ## scale factor
         self.width = width
         self.depth = depth
+        self.squeeze_ratio = [0.5, 0.25, 0.25, 0.25]
         ## pyramid feats
         self.base_dims = [64, 128, 256, 512, 1024]
         self.feat_dims = [round(dim * width) for dim in self.base_dims]
         ## block depth
-        self.base_blocks = [3, 9, 9, 3]
+        self.base_blocks = [3, 6, 6, 3]
         self.feat_blocks = [round(nblock * depth) for nblock in self.base_blocks]
         ## nonlinear
         self.act_type = act_type
@@ -45,23 +45,23 @@ class FasterConvNet(nn.Module):
         )
         ## P2/4
         self.layer_2 = nn.Sequential(   
-            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            CSPFasterStage(self.feat_dims[1], self.feat_dims[1], self.feat_blocks[0], 3, True, self.act_type, self.norm_type)
+            DSBlock(self.feat_dims[0], self.feat_dims[1], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
+            ELANStage(self.feat_dims[1], self.feat_dims[1], self.feat_blocks[0], self.squeeze_ratio[0], self.act_type, self.norm_type, self.depthwise)
         )
         ## P3/8
         self.layer_3 = nn.Sequential(
             DSBlock(self.feat_dims[1], self.feat_dims[2], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            CSPFasterStage(self.feat_dims[2], self.feat_dims[2], self.feat_blocks[1], 3, True, self.act_type, self.norm_type)
+            ELANStage(self.feat_dims[2], self.feat_dims[2], self.feat_blocks[1], self.squeeze_ratio[1], self.act_type, self.norm_type, self.depthwise)
         )
         ## P4/16
         self.layer_4 = nn.Sequential(
             DSBlock(self.feat_dims[2], self.feat_dims[3], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            CSPFasterStage(self.feat_dims[3], self.feat_dims[3], self.feat_blocks[2], 3, True, self.act_type, self.norm_type)
+            ELANStage(self.feat_dims[3], self.feat_dims[3], self.feat_blocks[2], self.squeeze_ratio[2], self.act_type, self.norm_type, self.depthwise)
         )
         ## P5/32
         self.layer_5 = nn.Sequential(
             DSBlock(self.feat_dims[3], self.feat_dims[4], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
-            CSPFasterStage(self.feat_dims[4], self.feat_dims[4], self.feat_blocks[3], 5, True, self.act_type, self.norm_type)
+            ELANStage(self.feat_dims[4], self.feat_dims[4], self.feat_blocks[3], self.squeeze_ratio[3], self.act_type, self.norm_type, self.depthwise)
         )
 
 
@@ -111,7 +111,7 @@ def load_weight(model, model_name):
 ## build MCNet
 def build_backbone(cfg, pretrained=False):
     # model
-    backbone = FasterConvNet(cfg['width'], cfg['depth'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
+    backbone = ELANNet(cfg['width'], cfg['depth'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
 
     # check whether to load imagenet pretrained weight
     if pretrained:

+ 67 - 41
models/detectors/rtcdet_v2/rtcdet_v2_basic.py

@@ -53,22 +53,22 @@ class Conv(nn.Module):
         if depthwise:
             convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias))
             # depthwise conv
-            if norm_type:
+            if norm_type is not None:
                 convs.append(get_norm(norm_type, c1))
-            if act_type:
+            if act_type is not None:
                 convs.append(get_activation(act_type))
             # pointwise conv
             convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias))
-            if norm_type:
+            if norm_type is not None:
                 convs.append(get_norm(norm_type, c2))
-            if act_type:
+            if act_type is not None:
                 convs.append(get_activation(act_type))
 
         else:
             convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias))
-            if norm_type:
+            if norm_type is not None:
                 convs.append(get_norm(norm_type, c2))
-            if act_type:
+            if act_type is not None:
                 convs.append(get_activation(act_type))
             
         self.convs = nn.Sequential(*convs)
@@ -125,33 +125,62 @@ class ChannelShuffle(nn.Module):
 
         return x
 
+## Inverse BottleNeck
+class InverseBottleneck(nn.Module):
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 expand_ratio=2.0,
+                 shortcut=False,
+                 act_type='silu',
+                 norm_type='BN',
+                 depthwise=False):
+        super(InverseBottleneck, self).__init__()
+        # ----------- Basic Parameters -----------
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.expand_dim = int(in_dim * expand_ratio)           
+        # ----------- Network Parameters -----------
+        self.cv1 = Conv(in_dim, in_dim, k=3, p=1, act_type=None, norm_type=norm_type, depthwise=depthwise)
+        self.cv2 = Conv(in_dim, self.expand_dim, k=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.cv3 = Conv(self.expand_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.shortcut = shortcut and in_dim == out_dim
+
+    def forward(self, x):
+        h = self.cv3(self.cv2(self.cv1(x)))
+
+        return x + h if self.shortcut else h
+
 
 # ---------------------------- Base Modules ----------------------------
-## Faster Module
-class FasterModule(nn.Module):
-    def __init__(self, in_dim, out_dim, split_ratio=0.25, kernel_size=3, shortcut=True, act_type='silu', norm_type='BN'):
+## ELAN Block
+class ELANBlock(nn.Module):
+    def __init__(self, in_dim, out_dim, squeeze_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
         super().__init__()
         # ----------- Basic Parameters -----------
         self.in_dim = in_dim
         self.out_dim = out_dim
-        self.split_ratio = split_ratio
-        self.expand_dim = in_dim * 2
-        self.shortcut = True if shortcut and in_dim == out_dim else False
-        self.act_type = act_type
-        self.norm_type = norm_type
+        self.inter_dim = round(in_dim * squeeze_ratio)
         # ----------- Network Parameters -----------
-        self.partial_conv = PartialConv(in_dim, in_dim, split_ratio, kernel_size, stride=1, act_type=None, norm_type=None)
-        self.expand_layer = Conv(in_dim, self.expand_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.project_layer = Conv(self.expand_dim, out_dim, k=1, act_type=None, norm_type=None)
+        self.cv1 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.cv2 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.cv3 = InverseBottleneck(self.inter_dim, self.inter_dim, expand_ratio=2, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.cv4 = InverseBottleneck(self.inter_dim, self.inter_dim, expand_ratio=2, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        # output
+        self.out_conv = Conv(self.inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
     def forward(self, x):
-        h = self.project_layer(self.expand_layer(self.partial_conv(x)))
+        x1 = self.cv1(x)
+        x2 = self.cv2(x)
+        x3 = self.cv3(x2)
+        x4 = self.cv4(x3)
+        out = self.out_conv(torch.cat([x1, x2, x3, x4], dim=1))
 
-        return x + h if self.shortcut else h
+        return out
 
-## CSP-style FasterBlock
-class CSPFasterStage(nn.Module):
-    def __init__(self, in_dim, out_dim, num_blocks=1, kernel_size=3, shortcut=True, act_type='silu', norm_type='BN'):
+## ELAN Stage
+class ELANStage(nn.Module):
+    def __init__(self, in_dim, out_dim, num_blocks=1, squeeze_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
         super().__init__()
         # -------------- Basic parameters --------------
         self.in_dim = in_dim
@@ -159,19 +188,16 @@ class CSPFasterStage(nn.Module):
         self.num_blocks = num_blocks
         self.inter_dim = in_dim // 2
         # -------------- Network parameters --------------
-        self.cv1 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.cv2 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.blocks = nn.Sequential(*[
-            FasterModule(self.inter_dim, self.inter_dim, 0.5, kernel_size, shortcut, act_type, norm_type)
-            for _ in range(self.num_blocks)])
-        self.out_proj = Conv(self.inter_dim*2, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.stage_blocks = nn.Sequential()
+        for i in range(self.num_blocks):
+            if i == 0:
+                self.stage_blocks.append(ELANBlock(in_dim, out_dim, squeeze_ratio, act_type, norm_type, depthwise))
+            else:
+                self.stage_blocks.append(ELANBlock(out_dim, out_dim, squeeze_ratio, act_type, norm_type, depthwise))
 
 
     def forward(self, x):
-        x1 = self.cv1(x)
-        x2 = self.blocks(self.cv2(x))
-
-        return self.out_proj(torch.cat([x1, x2], dim=1))
+        return self.stage_blocks(x)
     
 ## DownSample Block
 class DSBlock(nn.Module):
@@ -206,15 +232,15 @@ class DSBlock(nn.Module):
 # ---------------------------- FPN Modules ----------------------------
 ## build fpn's core block
 def build_fpn_block(cfg, in_dim, out_dim):
-    if cfg['fpn_core_block'] == 'faster_block':
-        layer = CSPFasterStage(in_dim      = in_dim,
-                               out_dim     = out_dim,
-                               num_blocks  = round(3 * cfg['depth']),
-                               kernel_size = 3,
-                               shortcut    = False,
-                               act_type    = cfg['fpn_act'],
-                               norm_type   = cfg['fpn_norm'],
-                               )
+    if cfg['fpn_core_block'] == 'elan_block':
+        layer = ELANStage(in_dim        = in_dim,
+                          out_dim       = out_dim,
+                          num_blocks    = round(3 * cfg['depth']),
+                          squeeze_ratio = cfg['fpn_squeeze_ratio'],
+                          act_type      = cfg['fpn_act'],
+                          norm_type     = cfg['fpn_norm'],
+                          depthwise     = cfg['fpn_depthwise']
+                          )
         
     return layer