Sfoglia il codice sorgente

add HeadConv in RTCDet PaFPN

yjh0410 2 anni fa
parent
commit
9065e15f1a

+ 11 - 11
models/detectors/rtcdet/rtcdet_pafpn.py

@@ -14,14 +14,6 @@ class RTCDetPaFPN(nn.Module):
         super(RTCDetPaFPN, self).__init__()
         # --------------------------- Basic Parameters ---------------------------
         self.in_dims = in_dims
-        self.fpn_dims = [round(256*cfg['width']), round(512*cfg['width']), round(1024*cfg['width'])]
-
-        # --------------------------- Input proj ---------------------------
-        if in_dims == self.fpn_dims:
-            self.input_projs = nn.ModuleList([nn.Identity() for _ in range(len(in_dims))])
-        else:
-            self.input_projs = nn.ModuleList([nn.Conv2d(in_dim, fpn_dim, kernel_size=1)
-                                            for in_dim, fpn_dim in zip(in_dims, self.fpn_dims)])
                 
         # --------------------------- Top-down FPN ---------------------------
         ## P5 -> P4
@@ -43,15 +35,20 @@ class RTCDetPaFPN(nn.Module):
         self.downsample_layer_2 = build_downsample_layer(cfg, round(256*cfg['width']), round(512*cfg['width']))
         self.bottom_up_layer_2 = build_fpn_block(cfg, round(512*cfg['width']) + in_dims[2], round(512*cfg['width']))
                 
+        ## Head convs
+        self.head_conv_1 = Conv(round(128*cfg['width']), round(256*cfg['width']), k=3, s=1, p=1, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
+        self.head_conv_2 = Conv(round(256*cfg['width']), round(512*cfg['width']), k=3, s=1, p=1, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
+        self.head_conv_3 = Conv(round(512*cfg['width']), round(1024*cfg['width']), k=3, s=1, p=1, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
+        
         # --------------------------- Output proj ---------------------------
         if out_dim is not None:
             self.out_layers = nn.ModuleList([
                 Conv(in_dim, out_dim, k=1, act_type=cfg['fpn_act'], norm_type=cfg['fpn_norm'])
-                for in_dim in [round(128*cfg['width']), round(256*cfg['width']), round(512*cfg['width'])]])
+                for in_dim in [round(256*cfg['width']), round(512*cfg['width']), round(1024*cfg['width'])]])
             self.out_dim = [out_dim] * 3
         else:
             self.out_layers = None
-            self.out_dim = [round(128*cfg['width']), round(256*cfg['width']), round(512*cfg['width'])]
+            self.out_dim = [round(256*cfg['width']), round(512*cfg['width']), round(1024*cfg['width'])]
 
 
     def forward(self, fpn_feats):
@@ -79,7 +76,10 @@ class RTCDetPaFPN(nn.Module):
         c18 = torch.cat([c17, c5], dim=1)
         c19 = self.bottom_up_layer_2(c18)
 
-        out_feats = [c13, c16, c19] # [P3, P4, P5]
+        c20 = self.head_conv_1(c13)
+        c21 = self.head_conv_2(c16)
+        c22 = self.head_conv_3(c19)
+        out_feats = [c20, c21, c22] # [P3, P4, P5]
         
         # output proj layers
         if self.out_layers is not None:

+ 2 - 2
models/detectors/yolov7/yolov7_backbone.py

@@ -209,13 +209,13 @@ if __name__ == '__main__':
     from thop import profile
     cfg = {
         'pretrained': True,
-        'backbone': 'elannet_huge',
+        'backbone': 'elannet_large',
         'bk_act': 'silu',
         'bk_norm': 'BN',
         'bk_dpw': False,
     }
     model, feats = build_backbone(cfg)
-    x = torch.randn(1, 3, 224, 224)
+    x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)
     t1 = time.time()

+ 1 - 1
models/detectors/yolox/README.md

@@ -11,4 +11,4 @@
 - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
 - For optimizer, we use SGD with weight decay 0.0005 and base per image lr 0.01 / 64,.
 - For learning rate scheduler, we use Cosine decay scheduler.
-- I am trying to retrain **YOLOX-M** and **YOLOX-L** with more GPUs, and I will update the AP of YOLOX-M and YOLOX-L in the table in the future.
+- The reason for the low performance of my reproduced **YOLOX-L** has not been found out yet.