1 жил өмнө · fe1f77db02
--- a/config/model_config/rtdetr_config.py
+++ b/config/model_config/rtdetr_config.py
@@ -7,16 +7,17 @@ rtdetr_cfg = {
 
				         # ---------------- Model config ----------------
			
 
				         ## Image Encoder - Backbone
			
 
				         'backbone': 'resnet18',
			
 
				-        'backbone_norm': 'FrozeBN',
			
 
				+        'backbone_norm': 'BN',
			
 
				         'pretrained': True,
			
 
				         'pretrained_weight': 'imagenet1k_v1',
			
 
				-        'freeze_at': 0,
			
 
				-        'freeze_stem_only': False,
			
 
				+        'freeze_at': -1,
			
 
				+        'freeze_stem_only': True,
			
 
				         'out_stride': [8, 16, 32],
			
 
				         'max_stride': 32,
			
 
				         ## Image Encoder - FPN
			
 
				         'fpn': 'hybrid_encoder',
			
 
				         'fpn_num_blocks': 3,
			
 
				+        'fpn_expansion': 0.5,
			
 
				         'fpn_act': 'silu',
			
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
@@ -72,6 +73,7 @@ rtdetr_cfg = {
 
				         ## Image Encoder - FPN
			
 
				         'fpn': 'hybrid_encoder',
			
 
				         'fpn_num_blocks': 3,
			
 
				+        'fpn_expansion': 1.0,
			
 
				         'fpn_act': 'silu',
			
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
--- a/models/detectors/rtdetr/basic_modules/basic.py
+++ b/models/detectors/rtdetr/basic_modules/basic.py
@@ -243,51 +243,71 @@ class PointwiseConv(nn.Module):
 
				 
			
 
				 
			
 
				 # ----------------- CNN Modules -----------------
			
 
				-class Bottleneck(nn.Module):
			
 
				-    def __init__(self,
			
 
				-                 in_dim,
			
 
				-                 out_dim,
			
 
				-                 expand_ratio = 0.5,
			
 
				-                 kernel_sizes = [3, 3],
			
 
				-                 shortcut     = True,
			
 
				-                 act_type     = 'silu',
			
 
				-                 norm_type    = 'BN',
			
 
				-                 depthwise    = False,):
			
 
				-        super(Bottleneck, self).__init__()
			
 
				-        inter_dim = int(out_dim * expand_ratio)
			
 
				-        if depthwise:
			
 
				-            self.cv1 = nn.Sequential(
			
 
				-                DepthwiseConv(in_dim, in_dim, kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type),
			
 
				-                PointwiseConv(in_dim, inter_dim, act_type=act_type, norm_type=norm_type),
			
 
				-            )
			
 
				-            self.cv2 = nn.Sequential(
			
 
				-                DepthwiseConv(inter_dim, inter_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type),
			
 
				-                PointwiseConv(inter_dim, out_dim, act_type=act_type, norm_type=norm_type),
			
 
				-            )
			
 
				-        else:
			
 
				-            self.cv1 = BasicConv(in_dim, inter_dim,  kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type)
			
 
				-            self.cv2 = BasicConv(inter_dim, out_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type)
			
 
				-        self.shortcut = shortcut and in_dim == out_dim
			
 
				+class RepVggBlock(nn.Module):
			
 
				+    def __init__(self, in_dim, out_dim, act_type='relu', norm_type='BN'):
			
 
				+        super().__init__()
			
 
				+        self.in_dim = in_dim
			
 
				+        self.out_dim = out_dim
			
 
				+        self.conv1 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
			
 
				+        self.conv2 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
			
 
				+        self.act = get_activation(act_type) 
			
 
				 
			
 
				     def forward(self, x):
			
 
				-        h = self.cv2(self.cv1(x))
			
 
				+        if hasattr(self, 'conv'):
			
 
				+            y = self.conv(x)
			
 
				+        else:
			
 
				+            y = self.conv1(x) + self.conv2(x)
			
 
				 
			
 
				-        return x + h if self.shortcut else h
			
 
				+        return self.act(y)
			
 
				 
			
 
				-class RTCBlock(nn.Module):
			
 
				+    def convert_to_deploy(self):
			
 
				+        if not hasattr(self, 'conv'):
			
 
				+            self.conv = nn.Conv2d(self.in_dim, self.out_dim, 3, 1, padding=1)
			
 
				+
			
 
				+        kernel, bias = self.get_equivalent_kernel_bias()
			
 
				+        self.conv.weight.data = kernel
			
 
				+        self.conv.bias.data = bias 
			
 
				+
			
 
				+    def get_equivalent_kernel_bias(self):
			
 
				+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
			
 
				+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
			
 
				+        
			
 
				+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1
			
 
				+
			
 
				+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
			
 
				+        if kernel1x1 is None:
			
 
				+            return 0
			
 
				+        else:
			
 
				+            return F.pad(kernel1x1, [1, 1, 1, 1])
			
 
				+
			
 
				+    def _fuse_bn_tensor(self, branch: BasicConv):
			
 
				+        if branch is None:
			
 
				+            return 0, 0
			
 
				+        kernel = branch.conv.weight
			
 
				+        running_mean = branch.norm.running_mean
			
 
				+        running_var = branch.norm.running_var
			
 
				+        gamma = branch.norm.weight
			
 
				+        beta = branch.norm.bias
			
 
				+        eps = branch.norm.eps
			
 
				+        std = (running_var + eps).sqrt()
			
 
				+        t = (gamma / std).reshape(-1, 1, 1, 1)
			
 
				+
			
 
				+        return kernel * t, beta - running_mean * gamma / std
			
 
				+
			
 
				+class RepRTCBlock(nn.Module):
			
 
				     def __init__(self,
			
 
				                  in_dim,
			
 
				                  out_dim,
			
 
				-                 num_blocks = 1,
			
 
				-                 shortcut   = False,
			
 
				+                 num_blocks = 3,
			
 
				+                 expansion  = 1.0,
			
 
				                  act_type   = 'silu',
			
 
				                  norm_type  = 'BN',
			
 
				-                 depthwise  = False,):
			
 
				-        super(RTCBlock, self).__init__()
			
 
				-        self.inter_dim = out_dim // 2
			
 
				-        self.input_proj = BasicConv(in_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				+                 ) -> None:
			
 
				+        super(RepRTCBlock, self).__init__()
			
 
				+        self.inter_dim = round(out_dim * expansion)
			
 
				+        self.input_proj = BasicConv(in_dim, self.inter_dim * 2, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				         self.m = nn.Sequential(*(
			
 
				-            Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
			
 
				+            RepVggBlock(self.inter_dim, self.inter_dim, act_type, norm_type)
			
 
				             for _ in range(num_blocks)))
			
 
				         self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
			
 
				 
			
@@ -303,3 +323,4 @@ class RTCBlock(nn.Module):
 
				         out = self.output_proj(torch.cat(out, dim=1))
			
 
				 
			
 
				         return out
			
 
				+
			
--- a/models/detectors/rtdetr/basic_modules/fpn.py
+++ b/models/detectors/rtdetr/basic_modules/fpn.py
@@ -4,10 +4,10 @@ import torch.nn.functional as F
 
				 from typing import List
			
 
				 
			
 
				 try:
			
 
				-    from .basic import BasicConv, RTCBlock
			
 
				+    from .basic import BasicConv, RepRTCBlock
			
 
				     from .transformer import TransformerEncoder
			
 
				 except:
			
 
				-    from  basic import BasicConv, RTCBlock
			
 
				+    from  basic import BasicConv, RepRTCBlock
			
 
				     from  transformer import TransformerEncoder
			
 
				 
			
 
				 
			
@@ -17,9 +17,9 @@ def build_fpn(cfg, in_dims, out_dim):
 
				         return HybridEncoder(in_dims     = in_dims,
			
 
				                              out_dim     = out_dim,
			
 
				                              num_blocks  = cfg['fpn_num_blocks'],
			
 
				+                             expansion   = cfg['fpn_expansion'],
			
 
				                              act_type    = cfg['fpn_act'],
			
 
				                              norm_type   = cfg['fpn_norm'],
			
 
				-                             depthwise   = cfg['fpn_depthwise'],
			
 
				                              num_heads   = cfg['en_num_heads'],
			
 
				                              num_layers  = cfg['en_num_layers'],
			
 
				                              ffn_dim     = cfg['en_ffn_dim'],
			
@@ -38,9 +38,9 @@ class HybridEncoder(nn.Module):
 
				                  in_dims        :List  = [256, 512, 1024],
			
 
				                  out_dim        :int   = 256,
			
 
				                  num_blocks     :int   = 3,
			
 
				+                 expansion      :float = 1.0,
			
 
				                  act_type       :str   = 'silu',
			
 
				                  norm_type      :str   = 'BN',
			
 
				-                 depthwise      :bool  = False,
			
 
				                  # Transformer's parameters
			
 
				                  num_heads      :int   = 8,
			
 
				                  num_layers     :int   = 1,
			
@@ -82,43 +82,39 @@ class HybridEncoder(nn.Module):
 
				 
			
 
				         # ---------------- Top dwon FPN ----------------
			
 
				         ## P5 -> P4
			
 
				-        self.top_down_layer_1 = RTCBlock(in_dim       = self.out_dim * 2,
			
 
				-                                         out_dim      = self.out_dim,
			
 
				-                                         num_blocks   = num_blocks,
			
 
				-                                         shortcut     = False,
			
 
				-                                         act_type     = act_type,
			
 
				-                                         norm_type    = norm_type,
			
 
				-                                         depthwise    = depthwise,
			
 
				-                                         )
			
 
				+        self.top_down_layer_1 = RepRTCBlock(in_dim     = self.out_dim * 2,
			
 
				+                                            out_dim     = self.out_dim,
			
 
				+                                            num_blocks  = num_blocks,
			
 
				+                                            expansion   = expansion,
			
 
				+                                            act_type    = act_type,
			
 
				+                                            norm_type   = norm_type,
			
 
				+                                           )
			
 
				         ## P4 -> P3
			
 
				-        self.top_down_layer_2 = RTCBlock(in_dim       = self.out_dim * 2,
			
 
				-                                         out_dim      = self.out_dim,
			
 
				-                                         num_blocks   = num_blocks,
			
 
				-                                         shortcut     = False,
			
 
				-                                         act_type     = act_type,
			
 
				-                                         norm_type    = norm_type,
			
 
				-                                         depthwise    = depthwise,
			
 
				-                                         )
			
 
				+        self.top_down_layer_2 = RepRTCBlock(in_dim     = self.out_dim * 2,
			
 
				+                                            out_dim     = self.out_dim,
			
 
				+                                            num_blocks  = num_blocks,
			
 
				+                                            expansion   = expansion,
			
 
				+                                            act_type    = act_type,
			
 
				+                                            norm_type   = norm_type,
			
 
				+                                            )
			
 
				         
			
 
				         # ---------------- Bottom up PAN----------------
			
 
				         ## P3 -> P4
			
 
				-        self.bottom_up_layer_1 = RTCBlock(in_dim       = self.out_dim * 2,
			
 
				-                                          out_dim      = self.out_dim,
			
 
				-                                          num_blocks   = num_blocks,
			
 
				-                                          shortcut     = False,
			
 
				-                                          act_type     = act_type,
			
 
				-                                          norm_type    = norm_type,
			
 
				-                                          depthwise    = depthwise,
			
 
				-                                          )
			
 
				+        self.bottom_up_layer_1 = RepRTCBlock(in_dim      = self.out_dim * 2,
			
 
				+                                             out_dim     = self.out_dim,
			
 
				+                                             num_blocks  = num_blocks,
			
 
				+                                             expansion   = expansion,
			
 
				+                                             act_type    = act_type,
			
 
				+                                             norm_type   = norm_type,
			
 
				+                                             )
			
 
				         ## P4 -> P5
			
 
				-        self.bottom_up_layer_2 = RTCBlock(in_dim       = self.out_dim * 2,
			
 
				-                                          out_dim      = self.out_dim,
			
 
				-                                          num_blocks   = num_blocks,
			
 
				-                                          shortcut     = False,
			
 
				-                                          act_type     = act_type,
			
 
				-                                          norm_type    = norm_type,
			
 
				-                                          depthwise    = depthwise,
			
 
				-                                          )
			
 
				+        self.bottom_up_layer_2 = RepRTCBlock(in_dim      = self.out_dim * 2,
			
 
				+                                             out_dim     = self.out_dim,
			
 
				+                                             num_blocks  = num_blocks,
			
 
				+                                             expansion   = expansion,
			
 
				+                                             act_type    = act_type,
			
 
				+                                             norm_type   = norm_type,
			
 
				+                                             )
			
 
				 
			
 
				         self.init_weights()
			
 
				   
			
@@ -169,6 +165,7 @@ if __name__ == '__main__':
 
				         'fpn_norm': 'BN',
			
 
				         'fpn_depthwise': False,
			
 
				         'fpn_num_blocks': 3,
			
 
				+        'fpn_expansion': 1.0,
			
 
				         'en_num_heads': 8,
			
 
				         'en_num_layers': 1,
			
 
				         'en_ffn_dim': 1024,