1 سال پیش · bce60c76ab
--- a/yolo/config/rtcdet_config.py
+++ b/yolo/config/rtcdet_config.py
@@ -21,13 +21,13 @@ def build_rtcdet_config(args):
 
				 class RTCDetBaseConfig(object):
			
 
				     def __init__(self) -> None:
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width    = 1.0
			
 
				-        self.depth    = 1.0
			
 
				-        self.ratio    = 1.0
			
 
				-        self.reg_max  = 16
			
 
				+        self.channel_width = 1.0
			
 
				+        self.last_stage_ratio = 1.0
			
 
				+        self.num_blocks = [3, 6, 6, 3]
			
 
				+        self.num_levels = 3
			
 
				         self.out_stride = [8, 16, 32]
			
 
				         self.max_stride = 32
			
 
				-        self.num_levels = 3
			
 
				+        self.reg_max    = 16
			
 
				         self.scale      = "b"
			
 
				         ## Backbone
			
 
				         self.bk_act   = 'silu'
			
@@ -41,6 +41,7 @@ class RTCDetBaseConfig(object):
 
				         self.neck_expand_ratio = 0.5
			
 
				         self.spp_pooling_size  = 5
			
 
				         ## FPN
			
 
				+        self.fpn_num_blocks = 3
			
 
				         self.fpn_act  = 'silu'
			
 
				         self.fpn_norm = 'BN'
			
 
				         self.fpn_depthwise = False
			
@@ -129,10 +130,11 @@ class RTCDetNConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 0.25
			
 
				-        self.depth = 0.34
			
 
				-        self.ratio = 2.0
			
 
				+        self.channel_width = 0.25
			
 
				+        self.last_stage_ratio = 2.0
			
 
				+        self.num_blocks = [1, 2, 2, 1]
			
 
				         self.scale = "n"
			
 
				+        self.fpn_num_blocks = 1
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
@@ -144,10 +146,11 @@ class RTCDetTConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 0.375
			
 
				-        self.depth = 0.34
			
 
				-        self.ratio = 2.0
			
 
				+        self.channel_width = 0.375
			
 
				+        self.last_stage_ratio = 2.0
			
 
				+        self.num_blocks = [1, 2, 2, 1]
			
 
				         self.scale = "t"
			
 
				+        self.fpn_num_blocks = 1
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
@@ -159,10 +162,11 @@ class RTCDetSConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 0.50
			
 
				-        self.depth = 0.34
			
 
				-        self.ratio = 2.0
			
 
				+        self.channel_width = 0.50
			
 
				+        self.num_blocks = [1, 2, 2, 1]
			
 
				+        self.last_stage_ratio = 2.0
			
 
				         self.scale = "s"
			
 
				+        self.fpn_num_blocks = 1
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
@@ -174,10 +178,11 @@ class RTCDetMConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 0.75
			
 
				-        self.depth = 0.67
			
 
				-        self.ratio = 1.5
			
 
				+        self.channel_width = 0.75
			
 
				+        self.last_stage_ratio = 1.5
			
 
				+        self.num_blocks = [2, 4, 4, 2]
			
 
				         self.scale = "m"
			
 
				+        self.fpn_num_blocks = 2
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
@@ -189,10 +194,11 @@ class RTCDetLConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 1.0
			
 
				-        self.depth = 1.0
			
 
				-        self.ratio = 1.0
			
 
				+        self.channel_width = 1.0
			
 
				+        self.last_stage_ratio = 1.0
			
 
				+        self.num_blocks = [3, 6, 6, 3]
			
 
				         self.scale = "l"
			
 
				+        self.fpn_num_blocks = 3
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
@@ -204,10 +210,11 @@ class RTCDetXConfig(RTCDetBaseConfig):
 
				     def __init__(self) -> None:
			
 
				         super().__init__()
			
 
				         # ---------------- Model config ----------------
			
 
				-        self.width = 1.25
			
 
				-        self.depth = 1.0
			
 
				-        self.ratio = 1.0
			
 
				+        self.channel_width = 1.25
			
 
				+        self.last_stage_ratio = 1.0
			
 
				+        self.num_blocks = [3, 6, 6, 3]
			
 
				         self.scale = "x"
			
 
				+        self.fpn_num_blocks = 4
			
 
				 
			
 
				         # ---------------- Data process config ----------------
			
 
				         self.mosaic_prob = 1.0
			
--- a/yolo/models/rtcdet/rtcdet_backbone.py
+++ b/yolo/models/rtcdet/rtcdet_backbone.py
@@ -13,11 +13,12 @@ class RTCBackbone(nn.Module):
 
				         super(RTCBackbone, self).__init__()
			
 
				         # ------------------ Basic setting ------------------
			
 
				         self.model_scale = cfg.scale
			
 
				-        self.feat_dims = [round(64  * cfg.width),
			
 
				-                          round(128 * cfg.width),
			
 
				-                          round(256 * cfg.width),
			
 
				-                          round(512 * cfg.width),
			
 
				-                          round(512 * cfg.width * cfg.ratio)]
			
 
				+        self.num_blocks  = cfg.num_blocks
			
 
				+        self.feat_dims = [round(64  * cfg.channel_width),
			
 
				+                          round(128 * cfg.channel_width),
			
 
				+                          round(256 * cfg.channel_width),
			
 
				+                          round(512 * cfg.channel_width),
			
 
				+                          round(512 * cfg.channel_width * cfg.last_stage_ratio)]
			
 
				         
			
 
				         # ------------------ Network setting ------------------
			
 
				         ## P1/2
			
@@ -31,7 +32,7 @@ class RTCBackbone(nn.Module):
 
				                       act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
			
 
				             ELANLayer(in_dim     = self.feat_dims[1],
			
 
				                       out_dim    = self.feat_dims[1],
			
 
				-                      num_blocks = round(3*cfg.depth),
			
 
				+                      num_blocks = self.num_blocks[0],
			
 
				                       expansion  = 0.5,
			
 
				                       shortcut   = True,
			
 
				                       act_type   = cfg.bk_act,
			
@@ -44,7 +45,7 @@ class RTCBackbone(nn.Module):
 
				                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
			
 
				             ELANLayer(in_dim     = self.feat_dims[2],
			
 
				                       out_dim    = self.feat_dims[2],
			
 
				-                      num_blocks = round(6*cfg.depth),
			
 
				+                      num_blocks = self.num_blocks[1],
			
 
				                       expansion  = 0.5,
			
 
				                       shortcut   = True,
			
 
				                       act_type   = cfg.bk_act,
			
@@ -57,7 +58,7 @@ class RTCBackbone(nn.Module):
 
				                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
			
 
				             ELANLayer(in_dim     = self.feat_dims[3],
			
 
				                       out_dim    = self.feat_dims[3],
			
 
				-                      num_blocks = round(6*cfg.depth),
			
 
				+                      num_blocks = self.num_blocks[2],
			
 
				                       expansion  = 0.5,
			
 
				                       shortcut   = True,
			
 
				                       act_type   = cfg.bk_act,
			
@@ -70,7 +71,7 @@ class RTCBackbone(nn.Module):
 
				                   act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
			
 
				             ELANLayer(in_dim     = self.feat_dims[4],
			
 
				                       out_dim    = self.feat_dims[4],
			
 
				-                      num_blocks = round(3*cfg.depth),
			
 
				+                      num_blocks = self.num_blocks[3],
			
 
				                       expansion  = 0.5,
			
 
				                       shortcut   = True,
			
 
				                       act_type   = cfg.bk_act,
			
--- a/yolo/models/rtcdet/rtcdet_basic.py
+++ b/yolo/models/rtcdet/rtcdet_basic.py
@@ -166,3 +166,59 @@ class ELANLayer(nn.Module):
 
				         out = self.output_proj(torch.cat(out, dim=1))
			
 
				 
			
 
				         return out
			
 
				+
			
 
				+class ELANLayerFPN(nn.Module):
			
 
				+    def __init__(self,
			
 
				+                 in_dim,
			
 
				+                 out_dim,
			
 
				+                 num_blocks :int   = 1,
			
 
				+                 expansion  :float = 0.5,
			
 
				+                 act_type   :str   = 'silu',
			
 
				+                 norm_type  :str   = 'BN',
			
 
				+                 depthwise  :bool  = False,
			
 
				+                 ) -> None:
			
 
				+        super(ELANLayerFPN, self).__init__()
			
 
				+        inter_dim_1 = round(out_dim * expansion)
			
 
				+        inter_dim_2 = round(inter_dim_1* expansion)
			
 
				+        # Branch-1
			
 
				+        self.branch_1 = BasicConv(in_dim, inter_dim_1, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				+        # Branch-2
			
 
				+        self.branch_2 = BasicConv(in_dim, inter_dim_1, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				+        # Branch-3
			
 
				+        branch_3 = []
			
 
				+        for i in range(num_blocks):
			
 
				+            if i == 0:
			
 
				+                branch_3.append(BasicConv(inter_dim_1, inter_dim_2, kernel_size=3, padding=1,
			
 
				+                                          act_type=act_type, norm_type=norm_type, depthwise=depthwise))
			
 
				+            else:
			
 
				+                branch_3.append(BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
			
 
				+                                          act_type=act_type, norm_type=norm_type, depthwise=depthwise))
			
 
				+        self.branch_3 = nn.Sequential(*branch_3)
			
 
				+        # Branch-4
			
 
				+        self.branch_4 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
			
 
				+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+                                                     for _ in range(num_blocks)])
			
 
				+        # Branch-5
			
 
				+        self.branch_5 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
			
 
				+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+                                                     for _ in range(num_blocks)])
			
 
				+        # Branch-6
			
 
				+        self.branch_6 = nn.Sequential(*[BasicConv(inter_dim_2, inter_dim_2, kernel_size=3, padding=1,
			
 
				+                                                  act_type=act_type, norm_type=norm_type, depthwise=depthwise)
			
 
				+                                                     for _ in range(num_blocks)])
			
 
				+        self.output_proj = BasicConv(2*inter_dim_1 + 4*inter_dim_2, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				+
			
 
				+    def forward(self, x):
			
 
				+        # Elan
			
 
				+        x1 = self.branch_1(x)
			
 
				+        x2 = self.branch_2(x)
			
 
				+        x3 = self.branch_3(x2)
			
 
				+        x4 = self.branch_4(x3)
			
 
				+        x5 = self.branch_5(x4)
			
 
				+        x6 = self.branch_6(x5)
			
 
				+
			
 
				+        # Output proj
			
 
				+        out = list([x1, x2, x3, x4, x5, x6])
			
 
				+        out = self.output_proj(torch.cat(out, dim=1))
			
 
				+
			
 
				+        return out
			
--- a/yolo/models/rtcdet/rtcdet_pafpn.py
+++ b/yolo/models/rtcdet/rtcdet_pafpn.py
@@ -3,7 +3,7 @@ import torch.nn as nn
 
				 import torch.nn.functional as F
			
 
				 from typing import List
			
 
				 
			
 
				-from .rtcdet_basic import ELANLayer, MDown
			
 
				+from .rtcdet_basic import ELANLayerFPN, MDown, BasicConv
			
 
				 
			
 
				 
			
 
				 # Modified YOLOv8's PaFPN
			
@@ -14,57 +14,53 @@ class RTCPaFPN(nn.Module):
 
				                  ) -> None:
			
 
				         super(RTCPaFPN, self).__init__()
			
 
				         print('==============================')
			
 
				-        print('FPN: {}'.format("Yolo PaFPN"))
			
 
				+        print('FPN: {}'.format("RTC-PaFPN"))
			
 
				         # --------------------------- Basic Parameters ---------------------------
			
 
				         self.in_dims = in_dims[::-1]
			
 
				-        self.out_dims = [round(256*cfg.width), round(512*cfg.width), round(512*cfg.width*cfg.ratio)]
			
 
				+        self.out_dims = [round(256*cfg.channel_width), round(512*cfg.channel_width), round(1024*cfg.channel_width)]
			
 
				 
			
 
				         # ----------------------------- Yolov8's Top-down FPN -----------------------------
			
 
				         ## P5 -> P4
			
 
				-        self.top_down_layer_1 = ELANLayer(in_dim     = self.in_dims[0] + self.in_dims[1],
			
 
				-                                          out_dim    = round(512*cfg.width),
			
 
				-                                          expansion  = 0.5,
			
 
				-                                          num_blocks = round(3 * cfg.depth),
			
 
				-                                          shortcut   = False,
			
 
				-                                          act_type   = cfg.fpn_act,
			
 
				-                                          norm_type  = cfg.fpn_norm,
			
 
				-                                          depthwise  = cfg.fpn_depthwise,
			
 
				-                                          )
			
 
				+        self.top_down_layer_1 = ELANLayerFPN(in_dim     = self.in_dims[0] + self.in_dims[1],
			
 
				+                                             out_dim    = round(512*cfg.channel_width),
			
 
				+                                             expansion  = 0.5,
			
 
				+                                             num_blocks = cfg.fpn_num_blocks,
			
 
				+                                             act_type   = cfg.fpn_act,
			
 
				+                                             norm_type  = cfg.fpn_norm,
			
 
				+                                             depthwise  = cfg.fpn_depthwise,
			
 
				+                                             )
			
 
				         ## P4 -> P3
			
 
				-        self.top_down_layer_2 = ELANLayer(in_dim     = self.in_dims[2] + round(512*cfg.width),
			
 
				-                                          out_dim    = round(256*cfg.width),
			
 
				-                                          expansion  = 0.5,
			
 
				-                                          num_blocks = round(3 * cfg.depth),
			
 
				-                                          shortcut   = False,
			
 
				-                                          act_type   = cfg.fpn_act,
			
 
				-                                          norm_type  = cfg.fpn_norm,
			
 
				-                                          depthwise  = cfg.fpn_depthwise,
			
 
				-                                          )
			
 
				+        self.top_down_layer_2 = ELANLayerFPN(in_dim     = self.in_dims[2] + round(512*cfg.channel_width),
			
 
				+                                             out_dim    = round(256*cfg.channel_width),
			
 
				+                                             expansion  = 0.5,
			
 
				+                                             num_blocks = cfg.fpn_num_blocks,
			
 
				+                                             act_type   = cfg.fpn_act,
			
 
				+                                             norm_type  = cfg.fpn_norm,
			
 
				+                                             depthwise  = cfg.fpn_depthwise,
			
 
				+                                             )
			
 
				         # ----------------------------- Yolov8's Bottom-up PAN -----------------------------
			
 
				         ## P3 -> P4
			
 
				-        self.dowmsample_layer_1 = MDown(round(256*cfg.width), round(256*cfg.width),
			
 
				+        self.dowmsample_layer_1 = MDown(round(256*cfg.channel_width), round(256*cfg.channel_width),
			
 
				                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
			
 
				-        self.bottom_up_layer_1 = ELANLayer(in_dim     = round(256*cfg.width) + round(512*cfg.width),
			
 
				-                                           out_dim    = round(512*cfg.width),
			
 
				-                                           expansion  = 0.5,
			
 
				-                                           num_blocks = round(3 * cfg.depth),
			
 
				-                                           shortcut   = False,
			
 
				-                                           act_type   = cfg.fpn_act,
			
 
				-                                           norm_type  = cfg.fpn_norm,
			
 
				-                                           depthwise  = cfg.fpn_depthwise,
			
 
				-                                           )
			
 
				+        self.bottom_up_layer_1  = ELANLayerFPN(in_dim     = round(256*cfg.channel_width) + round(512*cfg.channel_width),
			
 
				+                                               out_dim    = round(512*cfg.channel_width),
			
 
				+                                               expansion  = 0.5,
			
 
				+                                               num_blocks = cfg.fpn_num_blocks,
			
 
				+                                               act_type   = cfg.fpn_act,
			
 
				+                                               norm_type  = cfg.fpn_norm,
			
 
				+                                               depthwise  = cfg.fpn_depthwise,
			
 
				+                                               )
			
 
				         ## P4 -> P5
			
 
				-        self.dowmsample_layer_2 = MDown(round(512*cfg.width), round(512*cfg.width),
			
 
				+        self.dowmsample_layer_2 = MDown(round(512*cfg.channel_width), round(512*cfg.channel_width),
			
 
				                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
			
 
				-        self.bottom_up_layer_2 = ELANLayer(in_dim     = round(512*cfg.width) + self.in_dims[0],
			
 
				-                                           out_dim    = round(512*cfg.width*cfg.ratio),
			
 
				-                                           expansion  = 0.5,
			
 
				-                                           num_blocks = round(3 * cfg.depth),
			
 
				-                                           shortcut   = False,
			
 
				-                                           act_type   = cfg.fpn_act,
			
 
				-                                           norm_type  = cfg.fpn_norm,
			
 
				-                                           depthwise  = cfg.fpn_depthwise,
			
 
				-                                           )
			
 
				+        self.bottom_up_layer_2  = ELANLayerFPN(in_dim     = round(512*cfg.channel_width) + self.in_dims[0],
			
 
				+                                               out_dim    = round(1024*cfg.channel_width),
			
 
				+                                               expansion  = 0.5,
			
 
				+                                               num_blocks = cfg.fpn_num_blocks,
			
 
				+                                               act_type   = cfg.fpn_act,
			
 
				+                                               norm_type  = cfg.fpn_norm,
			
 
				+                                               depthwise  = cfg.fpn_depthwise,
			
 
				+                                               )
			
 
				 
			
 
				         self.init_weights()