yjh0410 2 éve
szülő
commit
2bce679faf

+ 1 - 1
README.md

@@ -123,7 +123,7 @@ python train.py --cuda -d coco --root path/to/COCO -m yolov1 -bs 16 --max_epoch
 
 | Model   |   Backbone    | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |---------|---------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
-| YOLOX-N | CSPDarkNet-N  |  640  |  300  |         31.1           |       49.5        |   7.5             |   2.3              | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_n_coco.pth) |
+| YOLOX-N | CSPDarkNet-N  |  640  |  300  |         30.4           |       48.9        |   7.5             |   2.3              | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_n_coco.pth) |
 | YOLOX-S | CSPDarkNet-S  |  640  |  300  |         39.0           |       58.8        |   26.8            |   8.9              | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_s_coco.pth) |
 | YOLOX-M | CSPDarkNet-M  |  640  |  300  |         44.6           |       63.8        |   74.3            |   25.4             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_m_coco.pth) |
 | YOLOX-L | CSPDarkNet-L  |  640  |  300  |         46.9           |       65.9        |   155.4           |   54.2             | [ckpt](https://github.com/yjh0410/PyTorch_YOLO_Tutorial/releases/download/yolo_tutorial_ckpt/yolox_l_coco.pth) |

+ 5 - 0
config/model_config/lowdet_config.py

@@ -9,6 +9,7 @@ lowdet_cfg = {
     'bk_act': 'silu',
     'bk_norm': 'BN',
     'bk_dpw': True,
+    'bk_num_heads': 4,
     'stride': [8, 16, 32],  # P3, P4, P5
     'max_stride': 32,
     ## Neck: SPP
@@ -23,6 +24,9 @@ lowdet_cfg = {
     'fpn_core_block': 'smblock',
     'fpn_reduce_layer': 'conv',
     'fpn_downsample_layer': 'conv',
+    'fpn_nblocks': 1,
+    'fpn_num_heads': 4,
+    'fpn_shortcut': False,
     'fpn_act': 'silu',
     'fpn_norm': 'BN',
     'fpn_depthwise': True,
@@ -30,6 +34,7 @@ lowdet_cfg = {
     'head': 'decoupled_head',
     'head_act': 'silu',
     'head_norm': 'BN',
+    'head_dim': 96,
     'num_cls_head': 2,
     'num_reg_head': 2,
     'head_depthwise': True,

+ 1 - 1
models/detectors/lowdet/lowdet.py

@@ -36,7 +36,7 @@ class LOWDet(nn.Module):
         self.nms_thresh = nms_thresh
         self.topk = topk
         self.deploy = deploy
-        self.head_dim = 64
+        self.head_dim = cfg['head_dim']
         
         # ---------------------- Network Parameters ----------------------
         ## ----------- Backbone -----------

+ 13 - 12
models/detectors/lowdet/lowdet_backbone.py

@@ -14,35 +14,35 @@ model_urls = {
 
 # ---------------------------- Backbones ----------------------------
 class ScaleModulationNet(nn.Module):
-    def __init__(self, act_type='silu', norm_type='BN', depthwise=False):
+    def __init__(self, num_heads=4, act_type='silu', norm_type='BN', depthwise=False):
         super(ScaleModulationNet, self).__init__()
-        self.feat_dims = [64, 128, 256]
+        self.feat_dims = [96, 192, 384]
         
         # P1/2
         self.layer_1 = nn.Sequential(
-            Conv(3, 16, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
-            Conv(16, 16, k=3, p=1, s=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
+            Conv(3, 24, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type),
+            Conv(24, 24, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise),
         )
 
         # P2/4
         self.layer_2 = nn.Sequential(   
-            DSBlock(16, act_type, norm_type, depthwise),             
-            SMBlock(32, 32, act_type, norm_type, depthwise)
+            DSBlock(24, act_type, norm_type, depthwise),             
+            SMBlock(48, 48, nblocks=1, num_heads=num_heads, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P3/8
         self.layer_3 = nn.Sequential(
-            DSBlock(32, act_type, norm_type, depthwise),             
-            SMBlock(64, 64, act_type, norm_type, depthwise)
+            DSBlock(48, act_type, norm_type, depthwise),             
+            SMBlock(96, 96, nblocks=3, num_heads=num_heads, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P4/16
         self.layer_4 = nn.Sequential(
-            DSBlock(64, act_type, norm_type, depthwise),             
-            SMBlock(128, 128, act_type, norm_type, depthwise)
+            DSBlock(96, act_type, norm_type, depthwise),             
+            SMBlock(192, 192, nblocks=3, num_heads=num_heads, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
         # P5/32
         self.layer_5 = nn.Sequential(
-            DSBlock(128, act_type, norm_type, depthwise),             
-            SMBlock(256, 256, act_type, norm_type, depthwise)
+            DSBlock(192, act_type, norm_type, depthwise),             
+            SMBlock(384, 384, nblocks=2, num_heads=num_heads, shortcut=True, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
         )
 
 
@@ -93,6 +93,7 @@ def load_weight(model, model_name):
 def build_backbone(cfg, pretrained=False): 
     # model
     backbone = ScaleModulationNet(
+        num_heads=cfg['bk_num_heads'],
         act_type=cfg['bk_act'],
         norm_type=cfg['bk_norm'],
         depthwise=cfg['bk_dpw']

+ 44 - 45
models/detectors/lowdet/lowdet_basic.py

@@ -83,53 +83,53 @@ class Conv(nn.Module):
 
 
 # ---------------------------- Core Modules ----------------------------
+## MultiHeadMixedConv
+class MultiHeadMixedConv(nn.Module):
+    def __init__(self, in_dim, out_dim, num_heads=4, shortcut=False, act_type='silu', norm_type='BN', depthwise=False):
+        super().__init__()
+        # -------------- Basic parameters --------------
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.num_heads = num_heads
+        self.head_dim = in_dim // num_heads
+        self.shortcut = shortcut
+        # -------------- Network parameters --------------
+        ## Scale Modulation
+        self.mixed_convs = nn.ModuleList([
+            Conv(self.head_dim, self.head_dim, k=2*i+1, p=i, act_type=None, norm_type=None, depthwise=depthwise)
+            for i in range(num_heads)])
+        ## Aggregation proj
+        self.out_proj = Conv(self.head_dim*num_heads, out_dim, k=1, act_type=act_type, norm_type=norm_type)
+
+    def forward(self, x):
+        xs = torch.chunk(x, self.num_heads, dim=1)
+        ys = [mixed_conv(x_h) for x_h, mixed_conv in zip(xs, self.mixed_convs)]
+        ys = self.out_proj(torch.cat(ys, dim=1))
+
+        return x + ys if self.shortcut else ys
+    
+
 ## Scale Modulation Block
 class SMBlock(nn.Module):
-    def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False):
-        super(SMBlock, self).__init__()
+    def __init__(self, in_dim, out_dim, nblocks=1, num_heads=4, shortcut=False, act_type='silu', norm_type='BN', depthwise=False):
+        super().__init__()
         # -------------- Basic parameters --------------
         self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.nblocks = nblocks
+        self.num_heads = num_heads
+        self.shortcut = shortcut
         self.inter_dim = in_dim // 2
         # -------------- Network parameters --------------
         self.cv1 = Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         self.cv2 = Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
         ## Scale Modulation
-        self.sm1 = nn.Sequential(
-            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
-            Conv(self.inter_dim, self.inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            )
-        self.sm2 = nn.Sequential(
-            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
-            Conv(self.inter_dim, self.inter_dim, k=5, p=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            )
-        self.sm3 = nn.Sequential(
-            Conv(self.inter_dim, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type),
-            Conv(self.inter_dim, self.inter_dim, k=7, p=3, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-            )
-        ## Aggregation proj
-        self.sm_aggregation = Conv(self.inter_dim*3, self.inter_dim, k=1, act_type=act_type, norm_type=norm_type)
-
-        # Output proj
-        self.out_proj = None
-        if in_dim != out_dim:
-            self.out_proj = Conv(self.inter_dim*2, out_dim, k=1, act_type=act_type, norm_type=norm_type)
-
-
-    def channel_shuffle(self, x, groups):
-        # type: (torch.Tensor, int) -> torch.Tensor
-        batchsize, num_channels, height, width = x.data.size()
-        per_group_dim = num_channels // groups
+        self.smblocks = nn.Sequential(*[
+            MultiHeadMixedConv(self.inter_dim, self.inter_dim, self.num_heads, self.shortcut, act_type, norm_type, depthwise)
+            for _ in range(nblocks)])
+        ## Output proj
+        self.out_proj = Conv(self.inter_dim*2, out_dim, k=1, act_type=act_type, norm_type=norm_type)
 
-        # reshape
-        x = x.view(batchsize, groups, per_group_dim, height, width)
-
-        x = torch.transpose(x, 1, 2).contiguous()
-
-        # flatten
-        x = x.view(batchsize, -1, height, width)
-
-        return x
-    
 
     def forward(self, x):
         """
@@ -142,18 +142,14 @@ class SMBlock(nn.Module):
         # branch-1
         x1 = self.cv1(x1)
         # branch-2
-        x2 = self.cv2(x2)
-        x2 = torch.cat([self.sm1(x2), self.sm2(x2), self.sm3(x2)], dim=1)
-        x2 = self.sm_aggregation(x2)
-        # channel shuffle
+        x2 = self.smblocks(x2)
+        # output
         out = torch.cat([x1, x2], dim=1)
-        out = self.channel_shuffle(out, groups=2)
-
-        if self.out_proj:
-            out = self.out_proj(out)
+        out = self.out_proj(out)
 
         return out
 
+
 ## DownSample Block
 class DSBlock(nn.Module):
     def __init__(self, in_dim, act_type='silu', norm_type='BN', depthwise=False):
@@ -208,6 +204,9 @@ def build_fpn_block(cfg, in_dim, out_dim):
     if cfg['fpn_core_block'] == 'smblock':
         layer = SMBlock(in_dim=in_dim,
                         out_dim=out_dim,
+                        nblocks=cfg['fpn_nblocks'],
+                        num_heads=cfg['fpn_num_heads'],
+                        shortcut=cfg['fpn_shortcut'],
                         act_type=cfg['fpn_act'],
                         norm_type=cfg['fpn_norm'],
                         depthwise=cfg['fpn_depthwise']