yjh0410 2 年 前
コミット
a1b69fb635

+ 0 - 21
config/__init__.py

@@ -39,13 +39,6 @@ from .data_config.transform_config import (
     rtcdet_v1_medium_trans_config,
     rtcdet_v1_large_trans_config,
     rtcdet_v1_huge_trans_config,
-    # RTMDet-v2-Style
-    rtcdet_v2_pico_trans_config,
-    rtcdet_v2_nano_trans_config,
-    rtcdet_v2_small_trans_config,
-    rtcdet_v2_medium_trans_config,
-    rtcdet_v2_large_trans_config,
-    rtcdet_v2_huge_trans_config,
 )
 
 def build_trans_config(trans_config='ssd'):
@@ -98,20 +91,6 @@ def build_trans_config(trans_config='ssd'):
     elif trans_config == 'rtcdet_v1_huge':
         cfg = rtcdet_v1_huge_trans_config
 
-    # RTMDetv2-style transform 
-    elif trans_config == 'rtcdet_v2_pico':
-        cfg = rtcdet_v2_pico_trans_config
-    elif trans_config == 'rtcdet_v2_nano':
-        cfg = rtcdet_v2_nano_trans_config
-    elif trans_config == 'rtcdet_v2_small':
-        cfg = rtcdet_v2_small_trans_config
-    elif trans_config == 'rtcdet_v2_medium':
-        cfg = rtcdet_v2_medium_trans_config
-    elif trans_config == 'rtcdet_v2_large':
-        cfg = rtcdet_v2_large_trans_config
-    elif trans_config == 'rtcdet_v2_huge':
-        cfg = rtcdet_v2_huge_trans_config
-
     print('Transform Config: {} \n'.format(cfg))
 
     return cfg

+ 0 - 116
config/data_config/transform_config.py

@@ -359,119 +359,3 @@ rtcdet_v1_pico_trans_config = {
     'mixup_type': 'yolox_mixup',
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
 }
-
-
-# ----------------------- RTMDet-v2's Transform -----------------------
-rtcdet_v2_huge_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 2.0],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 1.0,
-    'mixup_prob': 1.0,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}
-
-rtcdet_v2_large_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 2.0],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 1.0,
-    'mixup_prob': 1.0,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}
-
-rtcdet_v2_medium_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 2.0],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 1.0,
-    'mixup_prob': 1.0,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}
-
-rtcdet_v2_small_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 2.0],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 1.0,
-    'mixup_prob': 1.0,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}
-
-rtcdet_v2_nano_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 1.5],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 1.0,
-    'mixup_prob': 0.1,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}
-
-rtcdet_v2_pico_trans_config = {
-    'aug_type': 'yolov5',
-    # Basic Augment
-    'degrees': 0.0,
-    'translate': 0.2,
-    'scale': [0.5, 1.5],
-    'shear': 0.0,
-    'perspective': 0.0,
-    'hsv_h': 0.015,
-    'hsv_s': 0.7,
-    'hsv_v': 0.4,
-    # Mosaic & Mixup
-    'mosaic_prob': 0.5,
-    'mixup_prob': 0.0,
-    'mosaic_type': 'yolov5_mosaic',
-    'mixup_type': 'yolov5_mixup',
-    'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp
-}

+ 7 - 68
config/model_config/rtcdet_v2_config.py

@@ -2,65 +2,6 @@
 
 
 rtcdet_v2_cfg = {
-    'rtcdet_v2_n':{
-        # ---------------- Model config ----------------
-        ## Backbone
-        'backbone': 'fasternet',
-        'pretrained': True,
-        'bk_act': 'silu',
-        'bk_norm': 'BN',
-        'bk_depthwise': False,
-        'bk_split_ratio': 0.5,
-        'width': 0.25,
-        'depth': 0.34,
-        'stride': [8, 16, 32],  # P3, P4, P5
-        'max_stride': 32,
-        ## Neck: SPP
-        'neck': 'sppf',
-        'neck_expand_ratio': 0.5,
-        'pooling_size': 5,
-        'neck_act': 'silu',
-        'neck_norm': 'BN',
-        'neck_depthwise': False,
-        ## Neck: PaFPN
-        'fpn': 'rtcdet_pafpn',
-        'fpn_reduce_layer': 'conv',
-        'fpn_downsample_layer': 'conv',
-        'fpn_core_block': 'faster_block',
-        'fpn_split_ratio': 0.5,
-        'fpn_act': 'silu',
-        'fpn_norm': 'BN',
-        'fpn_depthwise': False,
-        ## Head
-        'head': 'decoupled_head',
-        'head_act': 'silu',
-        'head_norm': 'BN',
-        'num_cls_head': 2,
-        'num_reg_head': 2,
-        'head_depthwise': False,
-        'reg_max': 16,
-        # ---------------- Train config ----------------
-        ## Input
-        'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'rtcdet_v2_nano',
-        # ---------------- Assignment config ----------------
-        ## Matcher
-        'matcher': {'tal': {'topk': 10,
-                            'alpha': 0.5,
-                            'beta': 6.0},
-                    'ota': {'center_sampling_radius': 2.5,
-                             'topk_candidate': 10},
-                    },
-        # ---------------- Loss config ----------------
-        ## Loss weight
-        'ema_update': False,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        'loss_dfl_weight': 1.0,
-        # ---------------- Train config ----------------
-        'trainer_type': 'rtmdet',
-    },
-
     'rtcdet_v2_l':{
         # ---------------- Model config ----------------
         ## Backbone
@@ -69,15 +10,14 @@ rtcdet_v2_cfg = {
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'bk_split_ratio': 0.5,
         'width': 1.0,
         'depth': 1.0,
         'stride': [8, 16, 32],  # P3, P4, P5
         'max_stride': 32,
         ## Neck: SPP
-        'neck': 'sppf',
-        'neck_expand_ratio': 0.5,
-        'pooling_size': 5,
+        'neck': 'mixed_spp',
+        'neck_expand_ratio': 2.0,
+        'pooling_size': [5, 9, 13],
         'neck_act': 'silu',
         'neck_norm': 'BN',
         'neck_depthwise': False,
@@ -86,7 +26,6 @@ rtcdet_v2_cfg = {
         'fpn_reduce_layer': 'conv',
         'fpn_downsample_layer': 'conv',
         'fpn_core_block': 'faster_block',
-        'fpn_split_ratio': 0.5,
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
@@ -101,7 +40,7 @@ rtcdet_v2_cfg = {
         # ---------------- Train config ----------------
         ## Input
         'multi_scale': [0.5, 1.25],   # 320 -> 800
-        'trans_type': 'rtcdet_v2_large',
+        'trans_type': 'rtcdet_v1_large',
         # ---------------- Assignment config ----------------
         ## Matcher
         'matcher': {'tal': {'topk': 10,
@@ -113,9 +52,9 @@ rtcdet_v2_cfg = {
         # ---------------- Loss config ----------------
         ## Loss weight
         'ema_update': False,
-        'loss_cls_weight': 1.0,
-        'loss_box_weight': 5.0,
-        'loss_dfl_weight': 1.0,
+        'loss_cls_weight': {'tal': 0.5, 'ota': 1.0},
+        'loss_box_weight': {'tal': 7.0, 'ota': 5.0},
+        'loss_dfl_weight': {'tal': 1.5, 'ota': 1.0},
         # ---------------- Train config ----------------
         'trainer_type': 'rtmdet',
     },

+ 6 - 6
models/detectors/rtcdet_v2/loss.py

@@ -202,9 +202,9 @@ class Criterion(object):
         loss_dfl = loss_dfl.sum() / normalizer
 
         # total loss
-        losses = self.loss_cls_weight * loss_cls + \
-                 self.loss_box_weight * loss_box + \
-                 self.loss_dfl_weight * loss_dfl
+        losses = self.loss_cls_weight['tal'] * loss_cls + \
+                 self.loss_box_weight['tal'] * loss_box + \
+                 self.loss_dfl_weight['tal'] * loss_dfl
 
         loss_dict = dict(
                 loss_cls = loss_cls,
@@ -310,9 +310,9 @@ class Criterion(object):
         loss_dfl = loss_dfl.sum() / normalizer
 
         # total loss
-        losses = self.loss_cls_weight * loss_cls + \
-                 self.loss_box_weight * loss_box + \
-                 self.loss_dfl_weight * loss_dfl
+        losses = self.loss_cls_weight['ota'] * loss_cls + \
+                 self.loss_box_weight['ota'] * loss_box + \
+                 self.loss_dfl_weight['ota'] * loss_dfl
 
         loss_dict = dict(
                 loss_cls = loss_cls,

+ 14 - 16
models/detectors/rtcdet_v2/rtcdet_v2_backbone.py

@@ -1,9 +1,9 @@
 import torch
 import torch.nn as nn
 try:
-    from .rtcdet_v2_basic import Conv, FasterBlock, DSBlock
+    from .rtcdet_v2_basic import Conv, CSPFasterStage, DSBlock
 except:
-    from rtcdet_v2_basic import Conv, FasterBlock, DSBlock
+    from rtcdet_v2_basic import Conv, CSPFasterStage, DSBlock
 
 
 
@@ -20,19 +20,18 @@ model_urls = {
 # ---------------------------- Backbones ----------------------------
 # Modified FasterNet
 class FasterConvNet(nn.Module):
-    def __init__(self, width=1.0, depth=1.0, split_ratio=0.25, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, width=1.0, depth=1.0, act_type='silu', norm_type='BN', depthwise=False):
         super(FasterConvNet, self).__init__()
         # ------------------ Basic parameters ------------------
         ## scale factor
         self.width = width
         self.depth = depth
-        self.split_ratio = split_ratio
         ## pyramid feats
         self.base_dims = [64, 128, 256, 512, 1024]
         self.feat_dims = [round(dim * width) for dim in self.base_dims]
         ## block depth
-        self.base_depth = [3, 9, 9, 3]
-        self.feat_depth = [round(num * depth) for num in self.base_depth]
+        self.base_blocks = [3, 9, 9, 3]
+        self.feat_blocks = [round(nblock * depth) for nblock in self.base_blocks]
         ## nonlinear
         self.act_type = act_type
         self.norm_type = norm_type
@@ -46,23 +45,23 @@ class FasterConvNet(nn.Module):
         )
         ## P2/4
         self.layer_2 = nn.Sequential(   
-            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=self.act_type, norm_type=self.norm_type),
-            FasterBlock(self.feat_dims[1], self.feat_dims[1], self.split_ratio, self.feat_depth[0], True, self.act_type, self.norm_type)
+            Conv(self.feat_dims[0], self.feat_dims[1], k=3, p=1, s=2, act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
+            CSPFasterStage(self.feat_dims[1], self.feat_dims[1], self.feat_blocks[0], 3, True, self.act_type, self.norm_type)
         )
         ## P3/8
         self.layer_3 = nn.Sequential(
-            DSBlock(self.feat_dims[1], self.feat_dims[2], self.act_type, self.norm_type, self.depthwise),             
-            FasterBlock(self.feat_dims[2], self.feat_dims[2], self.split_ratio, self.feat_depth[1], True, self.act_type, self.norm_type)
+            DSBlock(self.feat_dims[1], self.feat_dims[2], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
+            CSPFasterStage(self.feat_dims[2], self.feat_dims[2], self.feat_blocks[1], 3, True, self.act_type, self.norm_type)
         )
         ## P4/16
         self.layer_4 = nn.Sequential(
-            DSBlock(self.feat_dims[2], self.feat_dims[3], self.act_type, self.norm_type, self.depthwise),             
-            FasterBlock(self.feat_dims[3], self.feat_dims[3], self.split_ratio, self.feat_depth[2], True, self.act_type, self.norm_type)
+            DSBlock(self.feat_dims[2], self.feat_dims[3], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
+            CSPFasterStage(self.feat_dims[3], self.feat_dims[3], self.feat_blocks[2], 3, True, self.act_type, self.norm_type)
         )
         ## P5/32
         self.layer_5 = nn.Sequential(
-            DSBlock(self.feat_dims[3], self.feat_dims[4], self.act_type, self.norm_type, self.depthwise),             
-            FasterBlock(self.feat_dims[4], self.feat_dims[4], self.split_ratio, self.feat_depth[3], True, self.act_type, self.norm_type)
+            DSBlock(self.feat_dims[3], self.feat_dims[4], act_type=self.act_type, norm_type=self.norm_type, depthwise=self.depthwise),
+            CSPFasterStage(self.feat_dims[4], self.feat_dims[4], self.feat_blocks[3], 3, True, self.act_type, self.norm_type)
         )
 
 
@@ -112,7 +111,7 @@ def load_weight(model, model_name):
 ## build MCNet
 def build_backbone(cfg, pretrained=False):
     # model
-    backbone = FasterConvNet(cfg['width'], cfg['depth'], cfg['bk_split_ratio'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
+    backbone = FasterConvNet(cfg['width'], cfg['depth'], cfg['bk_act'], cfg['bk_norm'], cfg['bk_depthwise'])
 
     # check whether to load imagenet pretrained weight
     if pretrained:
@@ -143,7 +142,6 @@ if __name__ == '__main__':
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_depthwise': False,
-        'bk_split_ratio': 0.25,
         'width': 1.0,
         'depth': 1.0,
         'stride': [8, 16, 32],  # P3, P4, P5

+ 41 - 20
models/detectors/rtcdet_v2/rtcdet_v2_basic.py

@@ -102,24 +102,47 @@ class PartialConv(nn.Module):
         x = torch.cat((x1, x2), 1)
 
         return x
-        
+
+## Channel Shuffle
+class ChannelShuffle(nn.Module):
+    def __init__(self, groups=1) -> None:
+        super().__init__()
+        self.groups = groups
+
+    def forward(self, x):
+        # type: (torch.Tensor, int) -> torch.Tensor
+        batchsize, num_channels, height, width = x.data.size()
+        channels_per_group = num_channels // self.groups
+
+        # reshape
+        x = x.view(batchsize, self.groups,
+                channels_per_group, height, width)
+
+        x = torch.transpose(x, 1, 2).contiguous()
+
+        # flatten
+        x = x.view(batchsize, -1, height, width)
+
+        return x
+
 
 # ---------------------------- Base Modules ----------------------------
 ## Faster Module
 class FasterModule(nn.Module):
-    def __init__(self, in_dim, out_dim, split_ratio=0.25, kernel_size=3, stride=1, shortcut=True, act_type='silu', norm_type='BN'):
+    def __init__(self, in_dim, out_dim, split_ratio=0.25, kernel_size=3, shortcut=True, act_type='silu', norm_type='BN'):
         super().__init__()
         # ----------- Basic Parameters -----------
         self.in_dim = in_dim
         self.out_dim = out_dim
         self.split_ratio = split_ratio
+        self.expand_dim = in_dim * 2
         self.shortcut = True if shortcut and in_dim == out_dim else False
         self.act_type = act_type
         self.norm_type = norm_type
         # ----------- Network Parameters -----------
-        self.partial_conv = PartialConv(in_dim, in_dim, split_ratio, kernel_size, stride, act_type=None, norm_type=None)
-        self.expand_layer = Conv(in_dim, in_dim*2, k=1, act_type=act_type, norm_type=norm_type)
-        self.project_layer = Conv(in_dim*2, out_dim, k=1, act_type=None, norm_type=None)
+        self.partial_conv = PartialConv(in_dim, in_dim, split_ratio, kernel_size, stride=1, act_type=None, norm_type=None)
+        self.expand_layer = Conv(in_dim, self.expand_dim, k=1, act_type=act_type, norm_type=norm_type)
+        self.project_layer = Conv(self.expand_dim, out_dim, k=1, act_type=None, norm_type=None)
 
     def forward(self, x):
         h = self.project_layer(self.expand_layer(self.partial_conv(x)))
@@ -127,20 +150,19 @@ class FasterModule(nn.Module):
         return x + h if self.shortcut else h
 
 ## CSP-style FasterBlock
-class FasterBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, split_ratio=0.5, num_blocks=1, shortcut=True, act_type='silu', norm_type='BN'):
+class CSPFasterStage(nn.Module):
+    def __init__(self, in_dim, out_dim, num_blocks=1, kernel_size=3, shortcut=True, act_type='silu', norm_type='BN'):
         super().__init__()
         # -------------- Basic parameters --------------
         self.in_dim = in_dim
         self.out_dim = out_dim
-        self.split_ratio = split_ratio
         self.num_blocks = num_blocks
         self.inter_dim = in_dim // 2
         # -------------- Network parameters --------------
         self.cv1 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(in_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.blocks = nn.Sequential(*[
-            FasterModule(self.inter_dim, self.inter_dim, split_ratio, 3, 1, shortcut, act_type, norm_type)
+            FasterModule(self.inter_dim, self.inter_dim, 0.5, kernel_size, shortcut, act_type, norm_type)
             for _ in range(self.num_blocks)])
         self.out_proj = Conv(self.inter_dim*2, out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
@@ -150,7 +172,7 @@ class FasterBlock(nn.Module):
         x2 = self.blocks(self.cv2(x))
 
         return self.out_proj(torch.cat([x1, x2], dim=1))
-
+    
 ## DownSample Block
 class DSBlock(nn.Module):
     def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
@@ -185,14 +207,14 @@ class DSBlock(nn.Module):
 ## build fpn's core block
 def build_fpn_block(cfg, in_dim, out_dim):
     if cfg['fpn_core_block'] == 'faster_block':
-        layer = FasterBlock(in_dim      = in_dim,
-                            out_dim     = out_dim,
-                            split_ratio = cfg['fpn_split_ratio'],
-                            num_blocks  = round(3 * cfg['depth']),
-                            shortcut    = False,
-                            act_type    = cfg['fpn_act'],
-                            norm_type   = cfg['fpn_norm'],
-                            )
+        layer = CSPFasterStage(in_dim      = in_dim,
+                               out_dim     = out_dim,
+                               num_blocks  = round(3 * cfg['depth']),
+                               kernel_size = 3,
+                               shortcut    = False,
+                               act_type    = cfg['fpn_act'],
+                               norm_type   = cfg['fpn_norm'],
+                               )
         
     return layer
 
@@ -212,7 +234,6 @@ def build_downsample_layer(cfg, in_dim, out_dim):
         assert in_dim == out_dim
         layer = nn.MaxPool2d((2, 2), stride=2)
     elif cfg['fpn_downsample_layer'] == 'dsblock':
-        layer = DSBlock(in_dim, out_dim, num_heads=cfg['fpn_num_heads'],
-                        act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'], depthwise=cfg['fpn_depthwise'])
+        layer = DSBlock(in_dim, out_dim, cfg['fpn_act'], cfg['fpn_norm'], cfg['fpn_depthwise'])
         
     return layer

+ 27 - 0
models/detectors/rtcdet_v2/rtcdet_v2_neck.py

@@ -24,6 +24,31 @@ class SPPF(nn.Module):
 
         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
 
+# Mixed Spatial Pyramid Pooling
+class MixedSPP(nn.Module):
+    def __init__(self, cfg, in_dim, out_dim, expand_ratio=2.0):
+        super().__init__()
+        # ------------- Basic parameters -------------
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.expand_dim = round(in_dim * expand_ratio)
+        self.num_maxpools = len(cfg['pooling_size']) + 1
+        # ------------- Network parameters -------------
+        self.input_proj = Conv(in_dim, self.expand_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
+        self.maxpools = nn.ModuleList()
+        for ksize in cfg['pooling_size']:
+            self.maxpools.append(nn.MaxPool2d(kernel_size=ksize, stride=1, padding=ksize// 2))
+        self.output_proj = Conv(self.expand_dim, out_dim, k=1, act_type=cfg['neck_act'], norm_type=cfg['neck_norm'])
+
+    def forward(self, x):
+        x = self.input_proj(x)
+        x_chunks = torch.chunk(x, self.num_maxpools, dim=1)
+        out = [x_chunks[0]]
+        for x_chunk, maxpool in zip(x_chunks[1:], self.maxpools):
+            out.append(maxpool(x_chunk))
+        out = torch.cat(out, dim=1)
+
+        return self.output_proj(out)
 
 # SPPF block with CSP module
 class SPPFBlockCSP(nn.Module):
@@ -67,6 +92,8 @@ def build_neck(cfg, in_dim, out_dim):
         neck = SPPF(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
     elif model == 'csp_sppf':
         neck = SPPFBlockCSP(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
+    elif model == 'mixed_spp':
+        neck = MixedSPP(cfg, in_dim, out_dim, cfg['neck_expand_ratio'])
 
     return neck
         

+ 1 - 1
models/detectors/rtcdet_v2/rtcdet_v2_pafpn.py

@@ -13,7 +13,7 @@ class RTCDetPaFPN(nn.Module):
         self.in_dims = in_dims
         self.fpn_dims = in_dims
         
-        # --------------------------- Top-down FPN---------------------------
+        # --------------------------- Top-down FPN ---------------------------
         ## P5 -> P4
         self.reduce_layer_1 = build_reduce_layer(cfg, self.fpn_dims[2], self.fpn_dims[2]//2)
         self.top_down_layer_1 = build_fpn_block(cfg, self.fpn_dims[1] + self.fpn_dims[2]//2, self.fpn_dims[1])