浏览代码

modify RTCDet-v2

yjh0410 2 年之前
父节点
当前提交
5b6ebce4bb

+ 9 - 9
config/model_config/rtcdet_v2_config.py

@@ -5,12 +5,12 @@ rtcdet_v2_cfg = {
     'rtcdet_v2_n':{
         # ---------------- Model config ----------------
         ## Backbone
-        'backbone': 'mcnet',
+        'backbone': 'fasternet',
         'pretrained': True,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'bk_num_heads': 4,
+        'bk_split_ratio': 0.5,
         'width': 0.25,
         'depth': 0.34,
         'stride': [8, 16, 32],  # P3, P4, P5
@@ -26,8 +26,8 @@ rtcdet_v2_cfg = {
         'fpn': 'rtcdet_pafpn',
         'fpn_reduce_layer': 'conv',
         'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'mcblock',
-        'fpn_num_heads': 4,
+        'fpn_core_block': 'faster_block',
+        'fpn_split_ratio': 0.5,
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
@@ -41,7 +41,7 @@ rtcdet_v2_cfg = {
         'reg_max': 16,
         # ---------------- Train config ----------------
         ## Input
-        'multi_scale': [0.5, 1.5],   # 320 -> 960
+        'multi_scale': [0.5, 1.25],   # 320 -> 800
         'trans_type': 'rtcdet_v2_nano',
         # ---------------- Assignment config ----------------
         ## Matcher
@@ -64,12 +64,12 @@ rtcdet_v2_cfg = {
     'rtcdet_v2_l':{
         # ---------------- Model config ----------------
         ## Backbone
-        'backbone': 'mcnet',
+        'backbone': 'fasternet',
         'pretrained': False,
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'bk_num_heads': 4,
+        'bk_split_ratio': 0.5,
         'width': 1.0,
         'depth': 1.0,
         'stride': [8, 16, 32],  # P3, P4, P5
@@ -85,8 +85,8 @@ rtcdet_v2_cfg = {
         'fpn': 'rtcdet_pafpn',
         'fpn_reduce_layer': 'conv',
         'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'mcblock',
-        'fpn_num_heads': 4,
+        'fpn_core_block': 'faster_block',
+        'fpn_split_ratio': 0.5,
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,

+ 42 - 38
models/detectors/rtcdet_v2/rtcdet_v2_backbone.py

@@ -1,33 +1,39 @@
 import torch
 import torch.nn as nn
 try:
-    from .rtcdet_v2_basic import Conv, MCBlock, DSBlock
+    from .rtcdet_v2_basic import Conv, FasterBlock, DSBlock
 except:
-    from rtcdet_v2_basic import Conv, MCBlock, DSBlock
+    from rtcdet_v2_basic import Conv, FasterBlock, DSBlock
 
 
 
 model_urls = {
-    'mcnet_p': None,
-    'mcnet_n': None,
-    'mcnet_t': None,
-    'mcnet_s': None,
-    'mcnet_m': None,
-    'mcnet_l': None,
-    'mcnet_x': None,
+    'fasternet_n': None,
+    'fasternet_t': None,
+    'fasternet_s': None,
+    'fasternet_m': None,
+    'fasternet_l': None,
+    'fasternet_x': None,
 }
 
 
 # ---------------------------- Backbones ----------------------------
-class MixedConvNet(nn.Module):
-    def __init__(self, width=1.0, depth=1.0, num_heads=4, act_type='silu', norm_type='BN', depthwise=False):
-        super(MixedConvNet, self).__init__()
+# Modified FasterNet
+class FasterConvNet(nn.Module):
+    def __init__(self, width=1.0, depth=1.0, split_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
+        super(FasterConvNet, self).__init__()
         # ------------------ Basic parameters ------------------
-        self.feat_dims_base = [64, 128, 256, 512, 1024]
-        self.nblocks_base = [3, 6, 9, 3]
-        self.feat_dims = [round(dim * width) for dim in self.feat_dims_base]
-        self.nblocks = [round(nblock * depth) for nblock in self.nblocks_base]
-        self.num_heads = num_heads
+        ## scale factor
+        self.width = width
+        self.depth = depth
+        self.split_ratio = split_ratio
+        ## pyramid feats
+        self.base_dims = [64, 128, 256, 512, 1024]
+        self.feat_dims = [round(dim * width) for dim in self.base_dims]
+        ## block depth
+        self.base_depth = [3, 9, 9, 3]
+        self.feat_depth = [round(num * depth) for num in self.base_depth]
+        ## nonlinear
         self.act_type = act_type
         self.norm_type = norm_type
         self.depthwise = depthwise
@@ -35,28 +41,28 @@ class MixedConvNet(nn.Module):
         # ------------------ Network parameters ------------------
         ## P1/2
         self.layer_1 = nn.Sequential(
-            Conv(3, self.feat_dims[0], k=3, p=1, s=2, act_type=self.act_type, norm_type=self.norm_type),
+            Conv(3, self.feat_dims[0], k=6, p=2, s=2, act_type=self.act_type, norm_type=self.norm_type),
             Conv(self.feat_dims[0], self.feat_dims[0], k=3, p=1, act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
         )
         ## P2/4
         self.layer_2 = nn.Sequential(   
             Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=self.act_type, norm_type=self.norm_type),
-            MCBlock(self.feat_dims[1], self.feat_dims[1], self.nblocks[0], self.num_heads, True, self.act_type, self.norm_type, self.depthwise)
+            FasterBlock(self.feat_dims[1], self.feat_dims[1], self.split_ratio, self.feat_depth[0], True, self.act_type, self.norm_type)
         )
         ## P3/8
         self.layer_3 = nn.Sequential(
-            DSBlock(self.feat_dims[1], self.feat_dims[2], self.num_heads, self.act_type, self.norm_type, self.depthwise),             
-            MCBlock(self.feat_dims[2], self.feat_dims[2], self.nblocks[1], self.num_heads, True, self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[1], self.feat_dims[2], self.act_type, self.norm_type, self.depthwise),             
+            FasterBlock(self.feat_dims[2], self.feat_dims[2], self.split_ratio, self.feat_depth[1], True, self.act_type, self.norm_type)
         )
         ## P4/16
         self.layer_4 = nn.Sequential(
-            DSBlock(self.feat_dims[2], self.feat_dims[3], self.num_heads, self.act_type, self.norm_type, self.depthwise),             
-            MCBlock(self.feat_dims[3], self.feat_dims[3], self.nblocks[2], self.num_heads, True, self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[2], self.feat_dims[3], self.act_type, self.norm_type, self.depthwise),             
+            FasterBlock(self.feat_dims[3], self.feat_dims[3], self.split_ratio, self.feat_depth[2], True, self.act_type, self.norm_type)
         )
         ## P5/32
         self.layer_5 = nn.Sequential(
-            DSBlock(self.feat_dims[3], self.feat_dims[4], self.num_heads, self.act_type, self.norm_type, self.depthwise),             
-            MCBlock(self.feat_dims[4], self.feat_dims[4], self.nblocks[3], self.num_heads, True, self.act_type, self.norm_type, self.depthwise)
+            DSBlock(self.feat_dims[3], self.feat_dims[4], self.act_type, self.norm_type, self.depthwise),             
+            FasterBlock(self.feat_dims[4], self.feat_dims[4], self.split_ratio, self.feat_depth[3], True, self.act_type, self.norm_type)
         )
 
 
@@ -106,24 +112,22 @@ def load_weight(model, model_name):
 ## build MCNet
 def build_backbone(cfg, pretrained=False):
     # model
-    backbone = MixedConvNet(cfg['width'], cfg['depth'], cfg['bk_num_heads'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
+    backbone = FasterConvNet(cfg['width'], cfg['depth'], cfg['bk_split_ratio'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
 
     # check whether to load imagenet pretrained weight
     if pretrained:
-        if cfg['width'] == 0.25 and cfg['depth'] == 0.34 and cfg['bk_depthwise']:
-            backbone = load_weight(backbone, model_name='mcnet_p')
-        elif cfg['width'] == 0.25 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='mcnet_n')
+        if cfg['width'] == 0.25 and cfg['depth'] == 0.34:
+            backbone = load_weight(backbone, model_name='fasternet_n')
         elif cfg['width'] == 0.375 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='mcnet_t')
+            backbone = load_weight(backbone, model_name='fasternet_t')
         elif cfg['width'] == 0.5 and cfg['depth'] == 0.34:
-            backbone = load_weight(backbone, model_name='mcnet_s')
+            backbone = load_weight(backbone, model_name='fasternet_s')
         elif cfg['width'] == 0.75 and cfg['depth'] == 0.67:
-            backbone = load_weight(backbone, model_name='mcnet_m')
+            backbone = load_weight(backbone, model_name='fasternet_m')
         elif cfg['width'] == 1.0 and cfg['depth'] == 1.0:
-            backbone = load_weight(backbone, model_name='mcnet_l')
+            backbone = load_weight(backbone, model_name='fasternet_l')
         elif cfg['width'] == 1.25 and cfg['depth'] == 1.34:
-            backbone = load_weight(backbone, model_name='mcnet_x')
+            backbone = load_weight(backbone, model_name='fasternet_x')
     feat_dims = backbone.feat_dims[-3:]
 
     return backbone, feat_dims
@@ -139,9 +143,9 @@ if __name__ == '__main__':
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'bk_num_heads': 4,
-        'width': 0.25,
-        'depth': 0.34,
+        'bk_split_ratio': 0.25,
+        'width': 1.0,
+        'depth': 1.0,
         'stride': [8, 16, 32],  # P3, P4, P5
         'max_stride': 32,
     }

+ 63 - 54
models/detectors/rtcdet_v2/rtcdet_v2_basic.py

@@ -34,6 +34,7 @@ def get_norm(norm_type, dim):
     elif norm_type == 'GN':
         return nn.GroupNorm(num_groups=32, num_channels=dim)
 
+## Basic Conv Module
 class Conv(nn.Module):
     def __init__(self, 
                  c1,                   # in channels
@@ -76,78 +77,87 @@ class Conv(nn.Module):
     def forward(self, x):
         return self.convs(x)
 
+## Partial Conv Module
+class PartialConv(nn.Module):
+    def __init__(self, in_dim, out_dim, split_ratio=0.25, kernel_size=1, stride=1, act_type=None, norm_type=None):
+        super().__init__()
+        # ----------- Basic Parameters -----------
+        assert in_dim == out_dim
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.split_ratio = split_ratio
+        self.split_dim = round(in_dim * split_ratio)
+        self.untouched_dim = in_dim - self.split_dim
+        self.kernel_size = kernel_size
+        self.padding = kernel_size // 2
+        self.stride = stride
+        self.act_type = act_type
+        self.norm_type = norm_type
+        # ----------- Network Parameters -----------
+        self.partial_conv = Conv(self.split_dim, self.split_dim, self.kernel_size, self.padding, self.stride, act_type=act_type, norm_type=norm_type)
+
+    def forward(self, x):
+        x1, x2 = torch.split(x, [self.split_dim, self.untouched_dim], dim=1)
+        x1 = self.partial_conv(x1)
+        x = torch.cat((x1, x2), 1)
+
+        return x
+        
 
 # ---------------------------- Base Modules ----------------------------
-## Multi-head Mixed Conv (MHMC)
-class MultiHeadMixedConv(nn.Module):
-    def __init__(self, in_dim, out_dim, num_heads=4, shortcut=False, act_type='silu', norm_type='BN', depthwise=False):
+## Faster Module
+class FasterModule(nn.Module):
+    def __init__(self, in_dim, out_dim, split_ratio=0.25, kernel_size=3, stride=1, shortcut=True, act_type='silu', norm_type='BN'):
         super().__init__()
-        # -------------- Basic parameters --------------
+        # ----------- Basic Parameters -----------
         self.in_dim = in_dim
         self.out_dim = out_dim
-        self.num_heads = num_heads
-        self.shortcut = shortcut
-        self.head_dim = in_dim // num_heads
-        # -------------- Network parameters --------------
-        ## Scale Modulation
-        self.mixed_convs = nn.ModuleList()
-        for i in range(num_heads):
-            self.mixed_convs.append(
-                Conv(self.head_dim, self.head_dim, k=2*i+1, p=i, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            )
-        ## Out-proj
-        self.out_proj = Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
-
+        self.split_ratio = split_ratio
+        self.shortcut = True if shortcut and in_dim == out_dim else False
+        self.act_type = act_type
+        self.norm_type = norm_type
+        # ----------- Network Parameters -----------
+        self.partial_conv = PartialConv(in_dim, in_dim, split_ratio, kernel_size, stride, act_type=None, norm_type=None)
+        self.expand_layer = Conv(in_dim, in_dim*2, k=1, act_type=act_type, norm_type=norm_type)
+        self.project_layer = Conv(in_dim*2, out_dim, k=1, act_type=None, norm_type=None)
 
     def forward(self, x):
-        xs = torch.chunk(x, self.num_heads, dim=1)
-        ys = [mixed_conv(x_h) for x_h, mixed_conv in zip(xs, self.mixed_convs)]
-        out = self.out_proj(torch.cat(ys, dim=1))
+        h = self.project_layer(self.expand_layer(self.partial_conv(x)))
 
-        return out + x if self.shortcut else out
+        return x + h if self.shortcut else h
 
-## Mixed Convolution Block
-class MCBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, nblocks=1, num_heads=4, shortcut=False, act_type='silu', norm_type='BN', depthwise=False):
+## CSP-style FasterBlock
+class FasterBlock(nn.Module):
+    def __init__(self, in_dim, out_dim, split_ratio=0.5, num_blocks=1, shortcut=True, act_type='silu', norm_type='BN'):
         super().__init__()
         # -------------- Basic parameters --------------
         self.in_dim = in_dim
         self.out_dim = out_dim
-        self.nblocks = nblocks
-        self.num_heads = num_heads
-        self.shortcut = shortcut
+        self.split_ratio = split_ratio
+        self.num_blocks = num_blocks
         self.inter_dim = in_dim // 2
         # -------------- Network parameters --------------
-        ## branch-1
-        self.cv1 = Conv(self.in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        self.cv2 = Conv(self.in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-        ## branch-2
-        self.smblocks = nn.Sequential(*[
-            MultiHeadMixedConv(self.inter_dim, self.inter_dim, self.num_heads, self.shortcut, act_type, norm_type, depthwise)
-            for _ in range(nblocks)])
-        ## out proj
+        self.cv1 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.cv2 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.blocks = nn.Sequential(*[
+            FasterModule(self.inter_dim, self.inter_dim, split_ratio, 3, 1, shortcut, act_type, norm_type)
+            for _ in range(self.num_blocks)])
         self.out_proj = Conv(self.inter_dim*2, out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
 
     def forward(self, x):
-        # branch-1
         x1 = self.cv1(x)
-        # branch-2
-        x2 = self.smblocks(self.cv2(x))
-        # output
-        out = torch.cat([x1, x2], dim=1)
-        out = self.out_proj(out)
+        x2 = self.blocks(self.cv2(x))
 
-        return out
+        return self.out_proj(torch.cat([x1, x2], dim=1))
 
 ## DownSample Block
 class DSBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, num_heads=4, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
         super().__init__()
         self.in_dim = in_dim
         self.out_dim = out_dim
         self.inter_dim = out_dim // 2
-        self.num_heads = num_heads
         # branch-1
         self.maxpool = nn.Sequential(
             Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
@@ -174,16 +184,15 @@ class DSBlock(nn.Module):
 # ---------------------------- FPN Modules ----------------------------
 ## build fpn's core block
 def build_fpn_block(cfg, in_dim, out_dim):
-    if cfg['fpn_core_block'] == 'mcblock':
-        layer = MCBlock(in_dim=in_dim,
-                        out_dim=out_dim,
-                        nblocks=round(cfg['depth'] * 3),
-                        num_heads=cfg['fpn_num_heads'],
-                        shortcut=False,
-                        act_type=cfg['fpn_act'],
-                        norm_type=cfg['fpn_norm'],
-                        depthwise=cfg['fpn_depthwise']
-                        )
+    if cfg['fpn_core_block'] == 'faster_block':
+        layer = FasterBlock(in_dim      = in_dim,
+                            out_dim     = out_dim,
+                            split_ratio = cfg['fpn_split_ratio'],
+                            num_blocks  = round(3 * cfg['depth']),
+                            shortcut    = False,
+                            act_type    = cfg['fpn_act'],
+                            norm_type   = cfg['fpn_norm'],
+                            )
         
     return layer
 

+ 2 - 2
models/detectors/rtcdet_v2/rtcdet_v2_head.py

@@ -75,8 +75,8 @@ class MultiLevelHead(nn.Module):
         self.multi_level_heads = nn.ModuleList(
             [SingleLevelHead(
                 in_dims[level],
-                max(out_dim, num_classes),   # cls head dim
-                max(out_dim, 4*reg_max),     # reg head dim
+                out_dim,            # cls head dim
+                out_dim,            # reg head dim
                 cfg['num_cls_head'],
                 cfg['num_reg_head'],
                 cfg['head_act'],