فهرست منبع

modify RTCDet

yjh0410 1 سال پیش
والد
کامیت
bce60c76ab
4فایلهای تغییر یافته به همراه133 افزوده شده و 73 حذف شده
  1. 30 23
      yolo/config/rtcdet_config.py
  2. 10 9
      yolo/models/rtcdet/rtcdet_backbone.py
  3. 56 0
      yolo/models/rtcdet/rtcdet_basic.py
  4. 37 41
      yolo/models/rtcdet/rtcdet_pafpn.py

+ 30 - 23
yolo/config/rtcdet_config.py

@@ -21,13 +21,13 @@ def build_rtcdet_config(args):
 class RTCDetBaseConfig(object):
     def __init__(self) -> None:
         # ---------------- Model config ----------------
-        self.width    = 1.0
-        self.depth    = 1.0
-        self.ratio    = 1.0
-        self.reg_max  = 16
+        self.channel_width = 1.0
+        self.last_stage_ratio = 1.0
+        self.num_blocks = [3, 6, 6, 3]
+        self.num_levels = 3
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
-        self.num_levels = 3
+        self.reg_max    = 16
         self.scale      = "b"
         ## Backbone
         self.bk_act   = 'silu'
@@ -41,6 +41,7 @@ class RTCDetBaseConfig(object):
         self.neck_expand_ratio = 0.5
         self.spp_pooling_size  = 5
         ## FPN
+        self.fpn_num_blocks = 3
         self.fpn_act  = 'silu'
         self.fpn_norm = 'BN'
         self.fpn_depthwise = False
@@ -129,10 +130,11 @@ class RTCDetNConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 0.25
-        self.depth = 0.34
-        self.ratio = 2.0
+        self.channel_width = 0.25
+        self.last_stage_ratio = 2.0
+        self.num_blocks = [1, 2, 2, 1]
         self.scale = "n"
+        self.fpn_num_blocks = 1
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -144,10 +146,11 @@ class RTCDetTConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 0.375
-        self.depth = 0.34
-        self.ratio = 2.0
+        self.channel_width = 0.375
+        self.last_stage_ratio = 2.0
+        self.num_blocks = [1, 2, 2, 1]
         self.scale = "t"
+        self.fpn_num_blocks = 1
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -159,10 +162,11 @@ class RTCDetSConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 0.50
-        self.depth = 0.34
-        self.ratio = 2.0
+        self.channel_width = 0.50
+        self.num_blocks = [1, 2, 2, 1]
+        self.last_stage_ratio = 2.0
         self.scale = "s"
+        self.fpn_num_blocks = 1
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -174,10 +178,11 @@ class RTCDetMConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 0.75
-        self.depth = 0.67
-        self.ratio = 1.5
+        self.channel_width = 0.75
+        self.last_stage_ratio = 1.5
+        self.num_blocks = [2, 4, 4, 2]
         self.scale = "m"
+        self.fpn_num_blocks = 2
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -189,10 +194,11 @@ class RTCDetLConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 1.0
-        self.depth = 1.0
-        self.ratio = 1.0
+        self.channel_width = 1.0
+        self.last_stage_ratio = 1.0
+        self.num_blocks = [3, 6, 6, 3]
         self.scale = "l"
+        self.fpn_num_blocks = 3
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -204,10 +210,11 @@ class RTCDetXConfig(RTCDetBaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
-        self.width = 1.25
-        self.depth = 1.0
-        self.ratio = 1.0
+        self.channel_width = 1.25
+        self.last_stage_ratio = 1.0
+        self.num_blocks = [3, 6, 6, 3]
         self.scale = "x"
+        self.fpn_num_blocks = 4
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0

+ 10 - 9
yolo/models/rtcdet/rtcdet_backbone.py

@@ -13,11 +13,12 @@ class RTCBackbone(nn.Module):
         super(RTCBackbone, self).__init__()
         # ------------------ Basic setting ------------------
         self.model_scale = cfg.scale
-        self.feat_dims = [round(64  * cfg.width),
-                          round(128 * cfg.width),
-                          round(256 * cfg.width),
-                          round(512 * cfg.width),
-                          round(512 * cfg.width * cfg.ratio)]
+        self.num_blocks  = cfg.num_blocks
+        self.feat_dims = [round(64  * cfg.channel_width),
+                          round(128 * cfg.channel_width),
+                          round(256 * cfg.channel_width),
+                          round(512 * cfg.channel_width),
+                          round(512 * cfg.channel_width * cfg.last_stage_ratio)]
         
         # ------------------ Network setting ------------------
         ## P1/2
@@ -31,7 +32,7 @@ class RTCBackbone(nn.Module):
                       act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
             ELANLayer(in_dim     = self.feat_dims[1],
                       out_dim    = self.feat_dims[1],
-                      num_blocks = round(3*cfg.depth),
+                      num_blocks = self.num_blocks[0],
                       expansion  = 0.5,
                       shortcut   = True,
                       act_type   = cfg.bk_act,
@@ -44,7 +45,7 @@ class RTCBackbone(nn.Module):
                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
             ELANLayer(in_dim     = self.feat_dims[2],
                       out_dim    = self.feat_dims[2],
-                      num_blocks = round(6*cfg.depth),
+                      num_blocks = self.num_blocks[1],
                       expansion  = 0.5,
                       shortcut   = True,
                       act_type   = cfg.bk_act,
@@ -57,7 +58,7 @@ class RTCBackbone(nn.Module):
                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
             ELANLayer(in_dim     = self.feat_dims[3],
                       out_dim    = self.feat_dims[3],
-                      num_blocks = round(6*cfg.depth),
+                      num_blocks = self.num_blocks[2],
                       expansion  = 0.5,
                       shortcut   = True,
                       act_type   = cfg.bk_act,
@@ -70,7 +71,7 @@ class RTCBackbone(nn.Module):
                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
             ELANLayer(in_dim     = self.feat_dims[4],
                       out_dim    = self.feat_dims[4],
-                      num_blocks = round(3*cfg.depth),
+                      num_blocks = self.num_blocks[3],
                       expansion  = 0.5,
                       shortcut   = True,
                       act_type   = cfg.bk_act,

+ 56 - 0
yolo/models/rtcdet/rtcdet_basic.py

@@ -166,3 +166,59 @@ class ELANLayer(nn.Module):
         out = self.output_proj(torch.cat(out, dim=1))
 
         return out
+
+class ELANLayerFPN(nn.Module):
+    def __init__(self,
+                 in_dim,
+                 out_dim,
+                 num_blocks :int   = 1,
+                 expansion  :float = 0.5,
+                 act_type   :str   = 'silu',
+                 norm_type  :str   = 'BN',
+                 depthwise  :bool  = False,
+                 ) -> None:
+        super(ELANLayerFPN, self).__init__()
+        inter_dim_1 = round(out_dim * expansion)
+        inter_dim_2 = round(inter_dim_1* expansion)
+        # Branch-1
+        self.branch_1 = BasicConv(in_dim, inter_dim_1, kernel_size=1, act_type=act_type, norm_type=norm_type)
+        # Branch-2
+        self.branch_2 = BasicConv(in_dim, inter_dim_1, kernel_size=1, act_type=act_type, norm_type=norm_type)
+        # Branch-3
+        branch_3 = []
+        for i in range(num_blocks):
+            if i == 0:
+                branch_3.append(BasicConv(inter_dim_1, inter_dim_2, kernel_size=3, padding=1,
+                                          act_type=act_type, norm_type=norm_type, depthwise=depthwise))
+            else:
+                branch_3.append(BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
+                                          act_type=act_type, norm_type=norm_type, depthwise=depthwise))
+        self.branch_3 = nn.Sequential(*branch_3)
+        # Branch-4
+        self.branch_4 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+                                                     for _ in range(num_blocks)])
+        # Branch-5
+        self.branch_5 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+                                                     for _ in range(num_blocks)])
+        # Branch-6
+        self.branch_6 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
+                                                     for _ in range(num_blocks)])
+        self.output_proj = BasicConv(2*inter_dim_1 + 4*inter_dim_2, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
+
+    def forward(self, x):
+        # Elan
+        x1 = self.branch_1(x)
+        x2 = self.branch_2(x)
+        x3 = self.branch_3(x2)
+        x4 = self.branch_4(x3)
+        x5 = self.branch_5(x4)
+        x6 = self.branch_6(x5)
+
+        # Output proj
+        out = list([x1, x2, x3, x4, x5, x6])
+        out = self.output_proj(torch.cat(out, dim=1))
+
+        return out

+ 37 - 41
yolo/models/rtcdet/rtcdet_pafpn.py

@@ -3,7 +3,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 from typing import List
 
-from .rtcdet_basic import ELANLayer, MDown
+from .rtcdet_basic import ELANLayerFPN, MDown, BasicConv
 
 
 # Modified YOLOv8's PaFPN
@@ -14,57 +14,53 @@ class RTCPaFPN(nn.Module):
                  ) -> None:
         super(RTCPaFPN, self).__init__()
         print('==============================')
-        print('FPN: {}'.format("Yolo PaFPN"))
+        print('FPN: {}'.format("RTC-PaFPN"))
         # --------------------------- Basic Parameters ---------------------------
         self.in_dims = in_dims[::-1]
-        self.out_dims = [round(256*cfg.width), round(512*cfg.width), round(512*cfg.width*cfg.ratio)]
+        self.out_dims = [round(256*cfg.channel_width), round(512*cfg.channel_width), round(1024*cfg.channel_width)]
 
         # ----------------------------- Yolov8's Top-down FPN -----------------------------
         ## P5 -> P4
-        self.top_down_layer_1 = ELANLayer(in_dim     = self.in_dims[0] + self.in_dims[1],
-                                          out_dim    = round(512*cfg.width),
-                                          expansion  = 0.5,
-                                          num_blocks = round(3 * cfg.depth),
-                                          shortcut   = False,
-                                          act_type   = cfg.fpn_act,
-                                          norm_type  = cfg.fpn_norm,
-                                          depthwise  = cfg.fpn_depthwise,
-                                          )
+        self.top_down_layer_1 = ELANLayerFPN(in_dim     = self.in_dims[0] + self.in_dims[1],
+                                             out_dim    = round(512*cfg.channel_width),
+                                             expansion  = 0.5,
+                                             num_blocks = cfg.fpn_num_blocks,
+                                             act_type   = cfg.fpn_act,
+                                             norm_type  = cfg.fpn_norm,
+                                             depthwise  = cfg.fpn_depthwise,
+                                             )
         ## P4 -> P3
-        self.top_down_layer_2 = ELANLayer(in_dim     = self.in_dims[2] + round(512*cfg.width),
-                                          out_dim    = round(256*cfg.width),
-                                          expansion  = 0.5,
-                                          num_blocks = round(3 * cfg.depth),
-                                          shortcut   = False,
-                                          act_type   = cfg.fpn_act,
-                                          norm_type  = cfg.fpn_norm,
-                                          depthwise  = cfg.fpn_depthwise,
-                                          )
+        self.top_down_layer_2 = ELANLayerFPN(in_dim     = self.in_dims[2] + round(512*cfg.channel_width),
+                                             out_dim    = round(256*cfg.channel_width),
+                                             expansion  = 0.5,
+                                             num_blocks = cfg.fpn_num_blocks,
+                                             act_type   = cfg.fpn_act,
+                                             norm_type  = cfg.fpn_norm,
+                                             depthwise  = cfg.fpn_depthwise,
+                                             )
         # ----------------------------- Yolov8's Bottom-up PAN -----------------------------
         ## P3 -> P4
-        self.dowmsample_layer_1 = MDown(round(256*cfg.width), round(256*cfg.width),
+        self.dowmsample_layer_1 = MDown(round(256*cfg.channel_width), round(256*cfg.channel_width),
                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
-        self.bottom_up_layer_1 = ELANLayer(in_dim     = round(256*cfg.width) + round(512*cfg.width),
-                                           out_dim    = round(512*cfg.width),
-                                           expansion  = 0.5,
-                                           num_blocks = round(3 * cfg.depth),
-                                           shortcut   = False,
-                                           act_type   = cfg.fpn_act,
-                                           norm_type  = cfg.fpn_norm,
-                                           depthwise  = cfg.fpn_depthwise,
-                                           )
+        self.bottom_up_layer_1  = ELANLayerFPN(in_dim     = round(256*cfg.channel_width) + round(512*cfg.channel_width),
+                                               out_dim    = round(512*cfg.channel_width),
+                                               expansion  = 0.5,
+                                               num_blocks = cfg.fpn_num_blocks,
+                                               act_type   = cfg.fpn_act,
+                                               norm_type  = cfg.fpn_norm,
+                                               depthwise  = cfg.fpn_depthwise,
+                                               )
         ## P4 -> P5
-        self.dowmsample_layer_2 = MDown(round(512*cfg.width), round(512*cfg.width),
+        self.dowmsample_layer_2 = MDown(round(512*cfg.channel_width), round(512*cfg.channel_width),
                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
-        self.bottom_up_layer_2 = ELANLayer(in_dim     = round(512*cfg.width) + self.in_dims[0],
-                                           out_dim    = round(512*cfg.width*cfg.ratio),
-                                           expansion  = 0.5,
-                                           num_blocks = round(3 * cfg.depth),
-                                           shortcut   = False,
-                                           act_type   = cfg.fpn_act,
-                                           norm_type  = cfg.fpn_norm,
-                                           depthwise  = cfg.fpn_depthwise,
-                                           )
+        self.bottom_up_layer_2  = ELANLayerFPN(in_dim     = round(512*cfg.channel_width) + self.in_dims[0],
+                                               out_dim    = round(1024*cfg.channel_width),
+                                               expansion  = 0.5,
+                                               num_blocks = cfg.fpn_num_blocks,
+                                               act_type   = cfg.fpn_act,
+                                               norm_type  = cfg.fpn_norm,
+                                               depthwise  = cfg.fpn_depthwise,
+                                               )
 
         self.init_weights()