yjh0410 2 years ago
parent
commit
7f3c58c6c6

+ 2 - 3
config/model_config/lodet_config.py

@@ -20,10 +20,9 @@ lodet_cfg = {
     'neck_depthwise': True,
     ## Neck: PaFPN
     'fpn': 'lodet_pafpn',
-    'fpn_reduce_layer': 'conv',
-    'fpn_downsample_layer': 'maxpool',
     'fpn_core_block': 'smblock',
-    'fpn_expand_ratio': 0.5,
+    'fpn_reduce_layer': 'conv',
+    'fpn_downsample_layer': 'dsblock',
     'fpn_act': 'silu',
     'fpn_norm': 'BN',
     'fpn_depthwise': True,

+ 15 - 12
models/detectors/lodet/lodet_backbone.py

@@ -1,9 +1,9 @@
 import torch
 import torch.nn as nn
 try:
-    from .lodet_basic import Conv, SMBlock
+    from .lodet_basic import Conv, SMBlock, DSBlock
 except:
-    from lodet_basic import Conv, SMBlock
+    from lodet_basic import Conv, SMBlock, DSBlock
 
 
 
@@ -16,30 +16,33 @@ model_urls = {
 class ScaleModulationNet(nn.Module):
     def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
         super(ScaleModulationNet, self).__init__()
-        self.feat_dims = [128, 256, 256]
+        self.feat_dims = [64, 128, 256]
         
         # P1/2
-        self.layer_1 = Conv(3, 32, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type)
+        self.layer_1 = nn.Sequential(
+            Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            Conv(16, 16, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+        )
 
         # P2/4
         self.layer_2 = nn.Sequential(   
-            nn.MaxPool2d((2, 2), stride=2),             
-            SMBlock(32, 64, 0.5, act_type, norm_type, depthwise)
+            DSBlock(16, 16, act_type, norm_type, depthwise),             
+            SMBlock(16, 32, act_type, norm_type, depthwise)
         )
         # P3/8
         self.layer_3 = nn.Sequential(
-            nn.MaxPool2d((2, 2), stride=2),             
-            SMBlock(64, 128, 0.5, act_type, norm_type, depthwise)
+            DSBlock(32, 32, act_type, norm_type, depthwise),             
+            SMBlock(32, 64, act_type, norm_type, depthwise)
         )
         # P4/16
         self.layer_4 = nn.Sequential(
-            nn.MaxPool2d((2, 2), stride=2),             
-            SMBlock(128, 256, 0.5, act_type, norm_type, depthwise)
+            DSBlock(64, 64, act_type, norm_type, depthwise),             
+            SMBlock(64, 128, act_type, norm_type, depthwise)
         )
         # P5/32
         self.layer_5 = nn.Sequential(
-            nn.MaxPool2d((2, 2), stride=2),             
-            SMBlock(256, 256, 0.5, act_type, norm_type, depthwise)
+            DSBlock(128, 128, act_type, norm_type, depthwise),             
+            SMBlock(128, 256, act_type, norm_type, depthwise)
         )
 
 

+ 33 - 7
models/detectors/lodet/lodet_basic.py

@@ -85,20 +85,28 @@ class Conv(nn.Module):
 # ---------------------------- Core Modules ----------------------------
 ## Scale Modulation Block
 class SMBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, expand_ratio=0.5, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
         super(SMBlock, self).__init__()
         # -------------- Basic parameters --------------
         self.in_dim = in_dim
         self.out_dim = out_dim
-        self.expand_ratio = expand_ratio
-        self.inter_dim = round(in_dim * expand_ratio)
+        self.inter_dim = in_dim // 2
         # -------------- Network parameters --------------
         self.cv1 = Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         ## Scale Modulation
-        self.sm1 = Conv(self.inter_dim, self.inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.sm2 = Conv(self.inter_dim, self.inter_dim, k=5, p=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.sm3 = Conv(self.inter_dim, self.inter_dim, k=7, p=3, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.sm1 = nn.Sequential(
+            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
+            Conv(self.inter_dim, self.inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            )
+        self.sm2 = nn.Sequential(
+            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
+            Conv(self.inter_dim, self.inter_dim, k=5, p=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            )
+        self.sm3 = nn.Sequential(
+            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
+            Conv(self.inter_dim, self.inter_dim, k=7, p=3, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+            )
         ## Output proj
         self.out_proj = Conv(self.inter_dim*4, self.out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
@@ -133,6 +141,23 @@ class SMBlock(nn.Module):
 
         return out
 
+## DownSample Block
+class DSBlock(nn.Module):
+    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
+        super().__init__()
+        self.maxpool = nn.MaxPool2d((2, 2), 2)
+        self.conv = Conv(in_dim//2, in_dim//2, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+        self.out_proj = Conv(in_dim, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+
+    def forward(self, x):
+        x1, x2 = torch.chunk(x, 2, dim=1)
+        x1 = self.maxpool(x1)
+        x2 = self.conv(x2)
+        out = torch.cat([x1, x2], dim=1)
+        out = self.out_proj(out)
+
+        return out
+
 
 # ---------------------------- FPN Modules ----------------------------
 ## build fpn's core block
@@ -140,7 +165,6 @@ def build_fpn_block(cfg, in_dim, out_dim):
     if cfg['fpn_core_block'] == 'smblock':
         layer = SMBlock(in_dim=in_dim,
                         out_dim=out_dim,
-                        expand_ratio=cfg['fpn_expand_ratio'],
                         act_type=cfg['fpn_act'],
                         norm_type=cfg['fpn_norm'],
                         depthwise=cfg['fpn_depthwise']
@@ -162,5 +186,7 @@ def build_downsample_layer(cfg, in_dim, out_dim):
     elif cfg['fpn_downsample_layer'] == 'maxpool':
         assert in_dim == out_dim
         layer = nn.MaxPool2d((2, 2), stride=2)
+    elif cfg['fpn_downsample_layer'] == 'dsblock':
+        layer = DSBlock(in_dim, out_dim, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'], depthwise=cfg['fpn_depthwise'])
         
     return layer

+ 19 - 22
models/detectors/lodet/lodet_pafpn.py

@@ -7,43 +7,40 @@ from .lodet_basic import (Conv, build_reduce_layer, build_downsample_layer, buil
 
 # YOLO-Style PaFPN
 class LodetPaFPN(nn.Module):
-    def __init__(self, cfg, in_dims=[128, 256, 256], out_dim=None):
+    def __init__(self, cfg, in_dims=[64, 128, 256], out_dim=None):
         super(LodetPaFPN, self).__init__()
         # --------------------------- Basic Parameters ---------------------------
-        self.in_dims = in_dims
-        c3, c4, c5 = in_dims
+        self.fpn_dims = in_dims
         
         # --------------------------- Top-down FPN---------------------------
         ## P5 -> P4
-        self.reduce_layer_1 = build_reduce_layer(cfg, c5, 128)
-        self.reduce_layer_2 = build_reduce_layer(cfg, c4, 128)
-        self.top_down_layer_1 = build_fpn_block(cfg, 128 + 128, 128)
+        self.reduce_layer_1 = build_reduce_layer(cfg, self.fpn_dims[2], self.fpn_dims[2]//2)
+        self.top_down_layer_1 = build_fpn_block(cfg, self.fpn_dims[1] + self.fpn_dims[2]//2, self.fpn_dims[1])
 
         ## P4 -> P3
-        self.reduce_layer_3 = build_reduce_layer(cfg, 128, 64)
-        self.reduce_layer_4 = build_reduce_layer(cfg, c3, 64)
-        self.top_down_layer_2 = build_fpn_block(cfg, 64 + 64, 64)
+        self.reduce_layer_2 = build_reduce_layer(cfg, self.fpn_dims[1], self.fpn_dims[1]//2)
+        self.top_down_layer_2 = build_fpn_block(cfg, self.fpn_dims[0] + self.fpn_dims[1]//2, self.fpn_dims[0])
 
         # --------------------------- Bottom-up FPN ---------------------------
         ## P3 -> P4
-        self.downsample_layer_1 = build_downsample_layer(cfg, 64, 64)
-        self.bottom_up_layer_1 = build_fpn_block(cfg, 64 + 64, 128)
+        self.downsample_layer_1 = build_downsample_layer(cfg, self.fpn_dims[0], self.fpn_dims[0])
+        self.bottom_up_layer_1 = build_fpn_block(cfg, self.fpn_dims[0] + self.fpn_dims[1]//2, self.fpn_dims[1])
 
         ## P4 -> P5
-        self.downsample_layer_2 = build_downsample_layer(cfg, 128, 128)
-        self.bottom_up_layer_2 = build_fpn_block(cfg, 128 + 128, 256)
+        self.downsample_layer_2 = build_downsample_layer(cfg, self.fpn_dims[1], self.fpn_dims[1])
+        self.bottom_up_layer_2 = build_fpn_block(cfg, self.fpn_dims[1] + self.fpn_dims[2]//2, self.fpn_dims[2])
                 
         # --------------------------- Output proj ---------------------------
         if out_dim is not None:
             self.out_layers = nn.ModuleList([
                 Conv(in_dim, out_dim, k=1,
                      act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
-                     for in_dim in [64, 128, 256]
+                     for in_dim in self.fpn_dims
                      ])
             self.out_dim = [out_dim] * 3
         else:
             self.out_layers = None
-            self.out_dim = self.in_dims
+            self.out_dim = self.fpn_dims
 
 
     def forward(self, features):
@@ -52,21 +49,21 @@ class LodetPaFPN(nn.Module):
         # Top down
         ## P5 -> P4
         c6 = self.reduce_layer_1(c5)
-        c7 = self.reduce_layer_2(c4)
-        c8 = torch.cat([F.interpolate(c6, scale_factor=2.0), c7], dim=1)
+        c7 = F.interpolate(c6, scale_factor=2.0)
+        c8 = torch.cat([c7, c4], dim=1)
         c9 = self.top_down_layer_1(c8)
         ## P4 -> P3
-        c10 = self.reduce_layer_3(c9)
-        c11 = self.reduce_layer_4(c3)
-        c12 = torch.cat([F.interpolate(c10, scale_factor=2.0), c11], dim=1)
+        c10 = self.reduce_layer_2(c9)
+        c11 = F.interpolate(c10, scale_factor=2.0)
+        c12 = torch.cat([c11, c3], dim=1)
         c13 = self.top_down_layer_2(c12)
 
         # Bottom up
-        # p3 -> P4
+        ## p3 -> P4
         c14 = self.downsample_layer_1(c13)
         c15 = torch.cat([c14, c10], dim=1)
         c16 = self.bottom_up_layer_1(c15)
-        # P4 -> P5
+        ## P4 -> P5
         c17 = self.downsample_layer_2(c16)
         c18 = torch.cat([c17, c6], dim=1)
         c19 = self.bottom_up_layer_2(c18)